Hello,
While testing a patch related to SMS I encountered a problem which this
patch tries to fix. The problematic case involves write after read to
the same memory location (see attached file sms-4.c). In this case we
have one anti-dep edge with distance 0 from node r (containing the read
operation) to node w (containing the write operation) and true dep edge
with distance 1 from node w to node r. Consider scheduling node r after
node w was already been scheduled and that node w is scheduled in the
start row of node r's scheduling window. In the current implemetation
we will add node w to node r's must_preceed bitmap although node w
must_follow node r in this case. Trying to avoid this we can use both
must_follow and must_preceed bitmaps for checking possible scheduling
columns in the start and end rows. To avoid the situation where a node
appears in both must_follow and must_preceed bitmaps the distance of
the edge is taken into account. I would appreciate comments regarding
this patch especially the part which tries to avoid situation where a
node appears in both must_follow and must_preceed bitmaps. My concern
is that there could be situation where for the above case (write after
read) w must_preceed r, so it might be that a more accurate calculation
is needed. This patch; together with another patch I mentioned at the
beginning where tested on ppc with no regressions so far.
Thanks,
Revital
(See attached file: sms-4.c.txt)(See attached file: patch_fix_order_12.txt)
/* Inspired from sbitmap_a_or_b_and_c_cg function in sbitmap.c. */
/* { dg-do run } */
/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves" } */
extern void abort (void);
int a[5] = { 0, 1, 0, 0, 0 };
int b[5] = { 0, 1, 0, 1, 0 };
int c[5] = { 0, 0, 1, 1, 0 };
int dst[5] = { 0, 0, 0, 0, 0 };
void
foo (int size, int *ap, int *bp, int *cp, int *dstp)
{
unsigned int i, n = size;
int changed = 0;
for (i = 0; i < n; i++)
{
const int tmp = *ap++ | (*bp++ & *cp++);
changed |= *dstp ^ tmp;
*dstp++ = tmp;
}
if (changed == 0)
abort ();
}
int
main ()
{
foo (5, a, b, c, dst);
return 0;
}
Index: modulo-sched.c
===================================================================
--- modulo-sched.c (revision 129721)
+++ modulo-sched.c (working copy)
@@ -1518,6 +1518,52 @@
return 0;
}
+
+/* Calculate MUST_PRECEDE and MUST_FOLLOW bitmaps of U_NODE; which is
+ the node currently been scheduled; using START, END and STEP which are
+ the boundaries of U_NODE's scheduling window, II and SCHED_NODES which
+ is the list of already scheduled nodes. At the end of the calculation
+ MUST_PRECEDE (MUST_FOLLOW) should contain all already scheduled
+ predecessors (successors) of U_NODE that MUST_PRECEDE (MUST_FOLLOW) it
+ if they are all scheduled in the START or END row in order to preserve
+ the dependence between them. */
+
+static void
+calculate_must_precede_and_must_follow (ddg_node_ptr u_node, int start,
+ int end, int step, int ii,
+ sbitmap sched_nodes,
+ sbitmap must_precede,
+ sbitmap must_follow)
+{
+ ddg_edge_ptr e;
+ int start_row = SMODULO (start, ii);
+ int end_row = SMODULO ((end - step), ii);
+
+ for (e = u_node->in; e != 0; e = e->next_in)
+ if (TEST_BIT (sched_nodes, e->src->cuid)
+ && ((SMODULO (SCHED_TIME (e->src), ii) == start_row)
+ || (SMODULO (SCHED_TIME (e->src), ii) == end_row)))
+ SET_BIT (must_precede, e->src->cuid);
+
+ for (e = u_node->out; e != 0; e = e->next_out)
+ if (TEST_BIT (sched_nodes, e->dest->cuid)
+ && ((SMODULO (SCHED_TIME (e->dest), ii) == end_row)
+ || (SMODULO (SCHED_TIME (e->dest), ii) == start_row)))
+ {
+ SET_BIT (must_follow, e->dest->cuid);
+
+ if (TEST_BIT (must_precede, e->dest->cuid))
+ {
+ /* Avoid inserting the same node to both must_precede and
+ must_follow by considering edges with distance zero. */
+ if (e->distance == 0)
+ RESET_BIT (must_precede, e->dest->cuid);
+ else
+ RESET_BIT (must_follow, e->dest->cuid);
+ }
+ }
+}
+
/* This function implements the scheduling algorithm for SMS according to the
above algorithm. */
static partial_schedule_ptr
@@ -1527,7 +1573,6 @@
int i, c, success, num_splits = 0;
int flush_and_start_over = true;
int num_nodes = g->num_nodes;
- ddg_edge_ptr e;
ps_insn_ptr psi;
int start, end, step; /* Place together into one struct? */
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
@@ -1590,18 +1635,11 @@
both scheduled in the same row. The current check is less
conservative and content with the fact that both U and the
insn are scheduled in the same row. */
- for (e = u_node->in; e != 0; e = e->next_in)
- if (TEST_BIT (sched_nodes, e->src->cuid)
- && (SMODULO (SCHED_TIME (e->src), ii) ==
- SMODULO (start, ii)))
- SET_BIT (must_precede, e->src->cuid);
+ calculate_must_precede_and_must_follow (u_node, start, end,
+ step, ii, sched_nodes,
+ must_precede,
+ must_follow);
- for (e = u_node->out; e != 0; e = e->next_out)
- if (TEST_BIT (sched_nodes, e->dest->cuid)
- && (SMODULO (SCHED_TIME (e->dest), ii) ==
- SMODULO ((end - step), ii)))
- SET_BIT (must_follow, e->dest->cuid);
-
gcc_assert ((step > 0 && start < end)
|| (step < 0 && start > end));