On 17/11/15 23:20, Tom de Vries wrote:
[ was: Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def ]
Hi,
Consider test-case test.c, with a use of the final value of the
iteration variable (return i):
...
unsigned int
foo (int *a, unsigned int n)
{
unsigned int i;
for (i = 0; i < n; ++i)
a[i] = 1;
return i;
}
...
Compiled with:
...
$ gcc -S -O2 test.c -ftree-parallelize-loops=2 -fdump-tree-all-details
...
Before parloops, we have:
...
<bb 4>:
# i_12 = PHI <0(3), i_10(5)>
_5 = (long unsigned int) i_12;
_6 = _5 * 4;
_8 = a_7(D) + _6;
*_8 = 1;
i_10 = i_12 + 1;
if (n_4(D) > i_10)
goto <bb 5>;
else
goto <bb 6>;
<bb 5>:
goto <bb 4>;
<bb 6>:
# i_14 = PHI <n_4(D)(4), 0(2)>
...
Parloops will fail because:
...
phi is n_2 = PHI <n_4(D)(4)>
arg of phi to exit: value n_4(D) used outside loop
checking if it a part of reduction pattern:
FAILED: it is not a part of reduction....
...
[ note that the phi looks slightly different. In
gather_scalar_reductions -> vect_analyze_loop_form ->
vect_analyze_loop_form_1 -> split_loop_exit_edge we split the edge from
bb4 to bb6. ]
This patch uses scev_const_prop at the start of parloops.
scev_const_prop first also splits the exit edge, and then replaces the
phi with a assignment:
...
final value replacement:
n_2 = PHI <n_4(D)(4)>
with
n_2 = n_4(D);
...
This allows parloops to succeed.
And there's a similar story when we compile with -fno-tree-scev-cprop in
addition.
Bootstrapped and reg-tested on x86_64.
OK for stage3/stage1?
The patch has been updated to do the final value replacement only for
the loop that parloops is processing, as suggested in review comment at
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02166.html .
That means the patch is now also required for the kernels patch series.
Bootstrapped and reg-tested on x86_64.
OK for stage 3 trunk?
Thanks,
- Tom
Do final value replacement in try_create_reduction_list
2015-11-18 Tom de Vries <t...@codesourcery.com>
* tree-scalar-evolution.c (final_value_replacement_loop): Factor out of ...
(scev_const_prop): ... here.
* tree-scalar-evolution.h (final_value_replacement_loop): Declare.
* tree-parloops.c (try_create_reduction_list): Call
final_value_replacement_loop.
* gcc.dg/autopar/pr68373.c: New test.
---
gcc/testsuite/gcc.dg/autopar/pr68373.c | 14 ++
gcc/tree-parloops.c | 3 +
gcc/tree-scalar-evolution.c | 248 +++++++++++++++++----------------
gcc/tree-scalar-evolution.h | 1 +
4 files changed, 145 insertions(+), 121 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/autopar/pr68373.c b/gcc/testsuite/gcc.dg/autopar/pr68373.c
new file mode 100644
index 0000000..8e0f8a5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/pr68373.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details" } */
+
+unsigned int
+foo (int *a, unsigned int n)
+{
+ unsigned int i;
+ for (i = 0; i < n; ++i)
+ a[i] = 1;
+
+ return i;
+}
+
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 17415a8..8d7912d 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2539,6 +2539,9 @@ try_create_reduction_list (loop_p loop,
gcc_assert (exit);
+ /* Try to get rid of exit phis. */
+ final_value_replacement_loop (loop);
+
gather_scalar_reductions (loop, reduction_list);
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 27630f0..9b33693 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -3417,6 +3417,131 @@ expression_expensive_p (tree expr)
}
}
+/* Do final value replacement for LOOP. */
+
+void
+final_value_replacement_loop (struct loop *loop)
+{
+ /* If we do not know exact number of iterations of the loop, we cannot
+ replace the final value. */
+ edge exit = single_exit (loop);
+ if (!exit)
+ return;
+
+ tree niter = number_of_latch_executions (loop);
+ if (niter == chrec_dont_know)
+ return;
+
+ /* Ensure that it is possible to insert new statements somewhere. */
+ if (!single_pred_p (exit->dest))
+ split_loop_exit_edge (exit);
+
+ /* Set stmt insertion pointer. All stmts are inserted before this point. */
+ gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
+
+ struct loop *ex_loop
+ = superloop_at_depth (loop,
+ loop_depth (exit->dest->loop_father) + 1);
+
+ gphi_iterator psi;
+ for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
+ {
+ gphi *phi = psi.phi ();
+ tree rslt = PHI_RESULT (phi);
+ tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
+ if (virtual_operand_p (def))
+ {
+ gsi_next (&psi);
+ continue;
+ }
+
+ if (!POINTER_TYPE_P (TREE_TYPE (def))
+ && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
+ {
+ gsi_next (&psi);
+ continue;
+ }
+
+ bool folded_casts;
+ def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
+ &folded_casts);
+ def = compute_overall_effect_of_inner_loop (ex_loop, def);
+ if (!tree_does_not_contain_chrecs (def)
+ || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
+ /* Moving the computation from the loop may prolong life range
+ of some ssa names, which may cause problems if they appear
+ on abnormal edges. */
+ || contains_abnormal_ssa_name_p (def)
+ /* Do not emit expensive expressions. The rationale is that
+ when someone writes a code like
+
+ while (n > 45) n -= 45;
+
+ he probably knows that n is not large, and does not want it
+ to be turned into n %= 45. */
+ || expression_expensive_p (def))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "not replacing:\n ");
+ print_gimple_stmt (dump_file, phi, 0, 0);
+ fprintf (dump_file, "\n");
+ }
+ gsi_next (&psi);
+ continue;
+ }
+
+ /* Eliminate the PHI node and replace it by a computation outside
+ the loop. */
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nfinal value replacement:\n ");
+ print_gimple_stmt (dump_file, phi, 0, 0);
+ fprintf (dump_file, " with\n ");
+ }
+ def = unshare_expr (def);
+ remove_phi_node (&psi, false);
+
+ /* If def's type has undefined overflow and there were folded
+ casts, rewrite all stmts added for def into arithmetics
+ with defined overflow behavior. */
+ if (folded_casts && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
+ {
+ gimple_seq stmts;
+ gimple_stmt_iterator gsi2;
+ def = force_gimple_operand (def, &stmts, true, NULL_TREE);
+ gsi2 = gsi_start (stmts);
+ while (!gsi_end_p (gsi2))
+ {
+ gimple *stmt = gsi_stmt (gsi2);
+ gimple_stmt_iterator gsi3 = gsi2;
+ gsi_next (&gsi2);
+ gsi_remove (&gsi3, false);
+ if (is_gimple_assign (stmt)
+ && arith_code_with_undefined_signed_overflow
+ (gimple_assign_rhs_code (stmt)))
+ gsi_insert_seq_before (&gsi,
+ rewrite_to_defined_overflow (stmt),
+ GSI_SAME_STMT);
+ else
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ }
+ }
+ else
+ def = force_gimple_operand_gsi (&gsi, def, false, NULL_TREE,
+ true, GSI_SAME_STMT);
+
+ gassign *ass = gimple_build_assign (rslt, def);
+ gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
+ if (dump_file)
+ {
+ print_gimple_stmt (dump_file, ass, 0, 0);
+ fprintf (dump_file, "\n");
+ }
+ }
+}
+
/* Replace ssa names for that scev can prove they are constant by the
appropriate constants. Also perform final value replacement in loops,
in case the replacement expressions are cheap.
@@ -3430,8 +3555,7 @@ scev_const_prop (void)
basic_block bb;
tree name, type, ev;
gphi *phi;
- gassign *ass;
- struct loop *loop, *ex_loop;
+ struct loop *loop;
bitmap ssa_names_to_remove = NULL;
unsigned i;
gphi_iterator psi;
@@ -3507,126 +3631,8 @@ scev_const_prop (void)
/* Now the regular final value replacement. */
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
- {
- edge exit;
- tree def, rslt, niter;
- gimple_stmt_iterator gsi;
-
- /* If we do not know exact number of iterations of the loop, we cannot
- replace the final value. */
- exit = single_exit (loop);
- if (!exit)
- continue;
-
- niter = number_of_latch_executions (loop);
- if (niter == chrec_dont_know)
- continue;
-
- /* Ensure that it is possible to insert new statements somewhere. */
- if (!single_pred_p (exit->dest))
- split_loop_exit_edge (exit);
- gsi = gsi_after_labels (exit->dest);
+ final_value_replacement_loop (loop);
- ex_loop = superloop_at_depth (loop,
- loop_depth (exit->dest->loop_father) + 1);
-
- for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
- {
- phi = psi.phi ();
- rslt = PHI_RESULT (phi);
- def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
- if (virtual_operand_p (def))
- {
- gsi_next (&psi);
- continue;
- }
-
- if (!POINTER_TYPE_P (TREE_TYPE (def))
- && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
- {
- gsi_next (&psi);
- continue;
- }
-
- bool folded_casts;
- def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
- &folded_casts);
- def = compute_overall_effect_of_inner_loop (ex_loop, def);
- if (!tree_does_not_contain_chrecs (def)
- || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
- /* Moving the computation from the loop may prolong life range
- of some ssa names, which may cause problems if they appear
- on abnormal edges. */
- || contains_abnormal_ssa_name_p (def)
- /* Do not emit expensive expressions. The rationale is that
- when someone writes a code like
-
- while (n > 45) n -= 45;
-
- he probably knows that n is not large, and does not want it
- to be turned into n %= 45. */
- || expression_expensive_p (def))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "not replacing:\n ");
- print_gimple_stmt (dump_file, phi, 0, 0);
- fprintf (dump_file, "\n");
- }
- gsi_next (&psi);
- continue;
- }
-
- /* Eliminate the PHI node and replace it by a computation outside
- the loop. */
- if (dump_file)
- {
- fprintf (dump_file, "\nfinal value replacement:\n ");
- print_gimple_stmt (dump_file, phi, 0, 0);
- fprintf (dump_file, " with\n ");
- }
- def = unshare_expr (def);
- remove_phi_node (&psi, false);
-
- /* If def's type has undefined overflow and there were folded
- casts, rewrite all stmts added for def into arithmetics
- with defined overflow behavior. */
- if (folded_casts && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
- && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
- {
- gimple_seq stmts;
- gimple_stmt_iterator gsi2;
- def = force_gimple_operand (def, &stmts, true, NULL_TREE);
- gsi2 = gsi_start (stmts);
- while (!gsi_end_p (gsi2))
- {
- gimple *stmt = gsi_stmt (gsi2);
- gimple_stmt_iterator gsi3 = gsi2;
- gsi_next (&gsi2);
- gsi_remove (&gsi3, false);
- if (is_gimple_assign (stmt)
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (stmt)))
- gsi_insert_seq_before (&gsi,
- rewrite_to_defined_overflow (stmt),
- GSI_SAME_STMT);
- else
- gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
- }
- }
- else
- def = force_gimple_operand_gsi (&gsi, def, false, NULL_TREE,
- true, GSI_SAME_STMT);
-
- ass = gimple_build_assign (rslt, def);
- gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
- if (dump_file)
- {
- print_gimple_stmt (dump_file, ass, 0, 0);
- fprintf (dump_file, "\n");
- }
- }
- }
return 0;
}
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index 6d31280..29c7cd4 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -33,6 +33,7 @@ extern tree analyze_scalar_evolution (struct loop *, tree);
extern tree instantiate_scev (basic_block, struct loop *, tree);
extern tree resolve_mixers (struct loop *, tree, bool *);
extern void gather_stats_on_scev_database (void);
+extern void final_value_replacement_loop (struct loop *);
extern unsigned int scev_const_prop (void);
extern bool expression_expensive_p (tree);
extern bool simple_iv (struct loop *, struct loop *, tree, struct affine_iv *,