On 03/21/2018 04:43 PM, Richard Biener wrote:
On Wed, 21 Mar 2018, Tom de Vries wrote:
On 03/12/2018 01:14 PM, Richard Biener wrote:
On Thu, 22 Feb 2018, Tom de Vries wrote:
Hi,
this patch fixes an ICE in the parloops pass.
The ICE (when compiling the test-case in attached patch) follows from the
fact
that here in gen_parallel_loop the call to canonicalize_loop_ivs fails to
"base all the induction variables in LOOP on a single control one":
...
/* Base all the induction variables in LOOP on a single control one.*/
canonicalize_loop_ivs (loop, &nit, true);
...
This is caused by the fact that for one of the induction variables,
simple_iv
no longer returns true (as was the case at the start of
gen_parallel_loop).
This seems to be triggered by the fact that during loop_version we call
scev_reset (although I'm not sure why when recalculating scev info we're
not
reaching the same conclusion as before).
I guess the real reason is that canonicalize_loop_ivs calls
create_iv first which in the parloop case with bump-in-latch true
and then incrementally re-writes PHIs, eventually wrecking other
PHIs SCEV. In this case the 2nd PHIs evolution depends on the
first one but the rewritten ones depend on the new IV already.
So the better fix (maybe not 100% enough) would be to re-organize
canonicalize_loop_ivs to first do analysis of all PHIs and then
rewrite them all.
Focusing on the re-organize first, is this sort of what you had in mind?
Yes, though not sure why you need to have a hash-map here, just pushing
to a vector of PHIs would work, no? Or alternatively a bitmap
of SSA versions so you have a way to match the gphi iterator with
the set of IVs. But the vector should be sorted in PHI order so
just traversing the PHIs looking for the "next" PHI in the vector
would work I guess.
Does it help with the original issue btw?
Not as such.
The original problem happens before canonicalize_loop_ivs.
Perhaps the easiest way to demonstrate the problem is this patch:
...
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 3a788cc..801fc46 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2254,6 +2254,38 @@ num_phis (basic_block bb, bool count_virtual_p)
return nr_phis;
}
+static unsigned
+get_nr_simple_ivs (struct loop *loop)
+{
+ unsigned cnt = 0;
+ basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+ if (bb->loop_father != loop)
+ continue;
+
+ for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi);
+ gsi_next (&psi))
+ {
+ gphi *phi = psi.phi ();
+ tree res = PHI_RESULT (phi);
+ if (virtual_operand_p (res))
+ continue;
+
+ affine_iv iv;
+ if (!simple_iv (loop, loop, res, &iv, true))
+ continue;
+ cnt++;
+ }
+ }
+
+ free (bbs);
+
+ return cnt;
+}
+
/* Generates code to execute the iterations of LOOP in N_THREADS
threads in parallel, which can be 0 if that number is to be determined
later.
@@ -2378,11 +2410,15 @@ gen_parallel_loop (struct loop *loop,
initialize_original_copy_tables ();
/* We assume that the loop usually iterates a lot. */
+ fprintf (stderr, "nr simple ivs: %u @ line %d\n",
+ get_nr_simple_ivs (loop), __LINE__);
loop_version (loop, many_iterations_cond, NULL,
profile_probability::likely (),
profile_probability::unlikely (),
profile_probability::likely (),
profile_probability::unlikely (), true);
+ fprintf (stderr, "nr simple ivs: %u @ line %d\n",
+ get_nr_simple_ivs (loop), __LINE__);
update_ssa (TODO_update_ssa);
free_original_copy_tables ();
}
...
and this output for pr83126.c:
...
nr simple ivs: 3 @ line 2414
nr simple ivs: 2 @ line 2421
...
I've written attached follow-up patch that works around that problem by
analyzing the phis before calling loop_version. It works for the
example, and bootstraps fine, but I'm not familiar enough with
loop_version to known whether this is safe or not.
Thanks,
- Tom
[parloops] Fix canonicalize_loop_ivs in gen_parallel_loop
2018-03-21 Tom de Vries <t...@codesourcery.com>
PR tree-optimization/83126
* tree-parloops.c (gen_parallel_loop): Inline canonicalize_loop_ivs.
Assert that canonicalize_loop_ivs succeeds.
* tree-ssa-loop-manip.c (get_all_phi_affine_ivs): Remove static.
(struct phi_affine_iv): Move ...
* tree-ssa-loop-manip.h (struct phi_affine_iv): ... here.
(get_all_phi_affine_ivs, canonicalize_loop_ivs_1): Declare.
---
gcc/tree-parloops.c | 27 ++++++---------------------
gcc/tree-ssa-loop-manip.c | 8 +-------
gcc/tree-ssa-loop-manip.h | 10 ++++++++++
3 files changed, 17 insertions(+), 28 deletions(-)
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 3a788cc..9c2d19a 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2339,6 +2339,9 @@ gen_parallel_loop (struct loop *loop,
we should compute nit = n * m, not nit = n.
Also may_be_zero handling would need to be adjusted. */
+ auto_vec<struct phi_affine_iv> simple_ivs;
+ get_all_phi_affine_ivs (loop, &simple_ivs);
+
type = TREE_TYPE (niter->niter);
nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
NULL_TREE);
@@ -2388,27 +2391,9 @@ gen_parallel_loop (struct loop *loop,
}
/* Base all the induction variables in LOOP on a single control one. */
- canonicalize_loop_ivs (loop, &nit, true);
- if (num_phis (loop->header, false) != reduction_list->elements () + 1)
- {
- /* The call to canonicalize_loop_ivs above failed to "base all the
- induction variables in LOOP on a single control one". Do damage
- control. */
- basic_block preheader = loop_preheader_edge (loop)->src;
- basic_block cond_bb = single_pred (preheader);
- gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
- gimple_cond_make_true (cond);
- update_stmt (cond);
- /* We've gotten rid of the duplicate loop created by loop_version, but
- we can't undo whatever canonicalize_loop_ivs has done.
- TODO: Fix this properly by ensuring that the call to
- canonicalize_loop_ivs succeeds. */
- if (dump_file
- && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
- " aborting transformation\n", loop->num);
- return;
- }
+ canonicalize_loop_ivs_1 (loop, &nit, true, &simple_ivs);
+ gcc_assert (num_phis (loop->header, false)
+ == reduction_list->elements () + 1);
/* Ensure that the exit condition is the first statement in the loop.
The common case is that latch of the loop is empty (apart from the
diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
index ecda325..6014384 100644
--- a/gcc/tree-ssa-loop-manip.c
+++ b/gcc/tree-ssa-loop-manip.c
@@ -1453,15 +1453,9 @@ rewrite_phi_with_iv (gphi_iterator *psi,
gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
}
-struct phi_affine_iv
-{
- gphi *phi;
- affine_iv iv;
-};
-
/* Return map of phi node result to affine_iv, for all phis in LOOPS. */
-static void
+void
get_all_phi_affine_ivs (struct loop *loop,
auto_vec<struct phi_affine_iv> *simple_ivs)
{
diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h
index 390ac6f..ef439be 100644
--- a/gcc/tree-ssa-loop-manip.h
+++ b/gcc/tree-ssa-loop-manip.h
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_TREE_SSA_LOOP_MANIP_H
#define GCC_TREE_SSA_LOOP_MANIP_H
+#include "tree-ssa-loop.h"
typedef void (*transform_callback)(struct loop *, void *);
@@ -56,6 +57,15 @@ extern void tree_unroll_loop (struct loop *, unsigned,
edge, struct tree_niter_desc *);
extern tree canonicalize_loop_ivs (struct loop *, tree *, bool);
+struct phi_affine_iv
+{
+ gphi *phi;
+ affine_iv iv;
+};
+extern void get_all_phi_affine_ivs (struct loop *,
+ auto_vec<struct phi_affine_iv> *);
+extern tree canonicalize_loop_ivs_1 (struct loop *, tree *, bool,
+ auto_vec<struct phi_affine_iv> *);
#endif /* GCC_TREE_SSA_LOOP_MANIP_H */