Hi,
For now, loop distribution handles variables used outside of loop as reduction.
This is inaccurate because all partitions contain statement defining induction
vars. Ideally we should factor out scev-propagation as a standalone interface
which can be called when necessary. Before that, this patch simply workarounds
reduction issue by checking if the statement belongs to all partitions. If yes,
the reduction must be computed in the last partition no matter how the loop is
distributed.
Bootstrap and test on x86_64 and AArch64. Is it OK?
Thanks,
bin
2017-06-07 Bin Cheng <bin.ch...@arm.com>
* tree-loop-distribution.c (classify_partition): New parameter and
better handle reduction statement.
(rdg_build_partitions): New parameter and record statements belonging
to all partitions.
(distribute_loop): Update use of above functions.
From 51764e6a377cf21ef13ffc36928c9f2b8932aac2 Mon Sep 17 00:00:00 2001
From: Bin Cheng <binch...@e108451-lin.cambridge.arm.com>
Date: Fri, 9 Jun 2017 13:21:07 +0100
Subject: [PATCH 12/14] reduction-workaround-20170607.txt
---
gcc/tree-loop-distribution.c | 40 +++++++++++++++++++++++++++-------------
1 file changed, 27 insertions(+), 13 deletions(-)
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 7e31fee8..167155e 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1276,17 +1276,18 @@ build_rdg_partition_for_vertex (struct graph *rdg, int
v)
}
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
- For the moment we detect only the memset zero pattern. */
+ For the moment we detect memset, memcpy and memmove patterns. Bitmap
+ STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
static void
-classify_partition (loop_p loop, struct graph *rdg, partition *partition)
+classify_partition (loop_p loop, struct graph *rdg, partition *partition,
+ bitmap stmt_in_all_partitions)
{
bitmap_iterator bi;
unsigned i;
tree nb_iter;
data_reference_p single_load, single_store;
- bool volatiles_p = false;
- bool plus_one = false;
+ bool volatiles_p = false, plus_one = false, has_reduction = false;
partition->kind = PKIND_NORMAL;
partition->main_dr = NULL;
@@ -1301,16 +1302,24 @@ classify_partition (loop_p loop, struct graph *rdg,
partition *partition)
if (gimple_has_volatile_ops (stmt))
volatiles_p = true;
- /* If the stmt has uses outside of the loop mark it as reduction. */
+ /* If the stmt is not included by all partitions and there is uses
+ outside of the loop, then mark the partition as reduction. */
if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
{
- partition->reduction_p = true;
- return;
+ if (!bitmap_bit_p (stmt_in_all_partitions, i))
+ {
+ partition->reduction_p = true;
+ return;
+ }
+ has_reduction = true;
}
}
/* Perform general partition disqualification for builtins. */
if (volatiles_p
+ /* Simple workaround to prevent classifying the partition as builtin
+ if it contains any use outside of loop. */
+ || has_reduction
|| !flag_tree_loop_distribute_patterns)
return;
@@ -1540,14 +1549,16 @@ share_memory_accesses (struct graph *rdg,
return false;
}
-/* Aggregate several components into a useful partition that is
- registered in the PARTITIONS vector. Partitions will be
- distributed in different loops. */
+/* For each seed statement in STARTING_STMTS, this function builds
+ partition for it by adding depended statements according to RDG.
+ All partitions are recorded in PARTITIONS. Statements belongs
+ to all partitions are recorded in STMT_IN_ALL_PARTITIONS. */
static void
rdg_build_partitions (struct graph *rdg,
vec<gimple *> starting_stmts,
- vec<partition *> *partitions)
+ vec<partition *> *partitions,
+ bitmap stmt_in_all_partitions)
{
auto_bitmap processed;
int i;
@@ -1568,6 +1579,7 @@ rdg_build_partitions (struct graph *rdg,
partition *partition = build_rdg_partition_for_vertex (rdg, v);
bitmap_ior_into (processed, partition->stmts);
+ bitmap_and_into (stmt_in_all_partitions, partition->stmts);
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1814,13 +1826,15 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
ddrs_vec = new vec<ddr_p> ();
ddrs_table = new hash_table<ddr_entry_hasher> (389);
+ auto_bitmap stmt_in_all_partitions;
auto_vec<struct partition *, 3> partitions;
- rdg_build_partitions (rdg, stmts, &partitions);
+ bitmap_set_range (stmt_in_all_partitions, 0, rdg->n_vertices);
+ rdg_build_partitions (rdg, stmts, &partitions, stmt_in_all_partitions);
any_builtin = false;
FOR_EACH_VEC_ELT (partitions, i, partition)
{
- classify_partition (loop, rdg, partition);
+ classify_partition (loop, rdg, partition, stmt_in_all_partitions);
any_builtin |= partition_builtin_p (partition);
}
--
1.9.1