Hi,
This simple patch makes interchange even more conservative for small loops with
constant initialized simple reduction.
The reason is undoing such reduction introduces new data reference and
cond_expr, which could cost too much in a small
loop.
Test gcc.target/aarch64/pr62178.c is fixed with this patch. Is it OK if test
passes?
Thanks,
bin
2017-12-08 Bin Cheng <bin.ch...@arm.com>
* gimple-loop-interchange.cc (struct loop_cand): New field.
(loop_cand::loop_cand): Init new field in constructor.
(loop_cand::classify_simple_reduction): Record simple reduction
initialized with constant value.
(should_interchange_loops): New parameter. Skip interchange if loop
has few data references and constant intitialized simple reduction.
(tree_loop_interchange::interchange): Update call to above function.
(should_interchange_loop_nest): Ditto.
diff --git a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc
index 6554a42..f45f7dc 100644
--- a/gcc/gimple-loop-interchange.cc
+++ b/gcc/gimple-loop-interchange.cc
@@ -199,13 +199,16 @@ struct loop_cand
edge m_exit;
/* Basic blocks of this loop. */
basic_block *m_bbs;
+ /* Number of constant initialized simple reduction. */
+ unsigned m_num_const_init_simple_reduc;
};
/* Constructor. */
loop_cand::loop_cand (struct loop *loop, struct loop *outer)
: m_loop (loop), m_outer (outer),
- m_exit (single_exit (loop)), m_bbs (get_loop_body (loop))
+ m_exit (single_exit (loop)), m_bbs (get_loop_body (loop)),
+ m_num_const_init_simple_reduc (0)
{
m_inductions.create (3);
m_reductions.create (3);
@@ -440,7 +443,9 @@ loop_cand::classify_simple_reduction (reduction_p re)
re->init_ref = gimple_assign_rhs1 (producer);
}
- else if (!CONSTANT_CLASS_P (re->init))
+ else if (CONSTANT_CLASS_P (re->init))
+ m_num_const_init_simple_reduc++;
+ else
return;
/* Check how reduction variable is used. */
@@ -1422,6 +1427,7 @@ dump_access_strides (vec<data_reference_p> datarefs)
static bool
should_interchange_loops (unsigned i_idx, unsigned o_idx,
vec<data_reference_p> datarefs,
+ unsigned num_const_init_simple_reduc,
bool innermost_loops_p, bool dump_info_p = true)
{
unsigned HOST_WIDE_INT ratio;
@@ -1522,6 +1528,12 @@ should_interchange_loops (unsigned i_idx, unsigned o_idx,
if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
return false;
+ /* Conservatively skip interchange in cases only have few data references
+ and constant initialized simple reduction since it introduces new data
+ reference as well as ?: operation. */
+ if (num_old_inv_drs + num_const_init_simple_reduc * 2 >= datarefs.length ())
+ return false;
+
/* We use different stride comparison ratio for interchanging innermost
two loops or not. The idea is to be conservative in interchange for
the innermost loops. */
@@ -1576,6 +1588,7 @@ tree_loop_interchange::interchange (vec<data_reference_p>
datarefs,
/* Check profitability for loop interchange. */
if (should_interchange_loops (i_idx, o_idx, datarefs,
+ iloop.m_num_const_init_simple_reduc,
iloop.m_loop->inner == NULL))
{
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1764,7 +1779,7 @@ should_interchange_loop_nest (struct loop *loop_nest,
struct loop *innermost,
/* Check if any two adjacent loops should be interchanged. */
for (struct loop *loop = innermost;
loop != loop_nest; loop = loop_outer (loop), idx--)
- if (should_interchange_loops (idx, idx - 1, datarefs,
+ if (should_interchange_loops (idx, idx - 1, datarefs, 0,
loop == innermost, false))
return true;