http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45352

--- Comment #7 from Andrey Belevantsev <abel at gcc dot gnu.org> 2010-09-28 
11:47:33 UTC ---
Thanks for the test, it shows one more case of using issue_rate that I have
missed.  We need to distinguish between the stalls caused by data dependencies
and by lack of functional units, i.e. DFA related stalls.  This is because when
resetting the cycles, we know that the latter should remain as is (which is
actually is checked by the failing assert) but not the former.  Fixed by
allowing need_stall to be negative and handling this situation in
stall_for_cycles.  This again fixes all tests with all option combinations.

Zdenek, if you can, please test this in your environment, and I will check ia64
and possibly ppc/ppc64, because with this kind of patch I need to make sure
that other (honest) targets are unaffected.


diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index 041c471..5bf0e19 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -4051,10 +4051,11 @@ sel_dfa_new_cycle (insn_t insn, fence_t fence)
 /* Invoke reorder* target hooks on the ready list.  Return the number of insns
    we can issue.  FENCE is the current fence.  */
 static int
-invoke_reorder_hooks (fence_t fence)
+invoke_reorder_hooks (fence_t fence, bool *pran_hook)
 {
   int issue_more;
-  bool ran_hook = false;
+
+  *pran_hook = false;

   /* Call the reorder hook at the beginning of the cycle, and call
      the reorder2 hook in the middle of the cycle.  */
@@ -4077,7 +4078,7 @@ invoke_reorder_hooks (fence_t fence)
           if (pipelining_p)
             ++ready.n_ready;

-          ran_hook = true;
+          *pran_hook = true;
         }
       else
         /* Initialize can_issue_more for variable_issue.  */
@@ -4106,14 +4107,14 @@ invoke_reorder_hooks (fence_t fence)
             ++ready.n_ready;
         }

-      ran_hook = true;
+      *pran_hook = true;
     }
   else
     issue_more = FENCE_ISSUE_MORE (fence);

   /* Ensure that ready list and vec_av_set are in line with each other,
      i.e. vec_av_set[i] == ready_element (&ready, i).  */
-  if (issue_more && ran_hook)
+  if (issue_more && *pran_hook)
     {
       int i, j, n;
       rtx *arr = ready.vec;
@@ -4313,7 +4314,7 @@ get_expr_cost (expr_t expr, fence_t fence)
 /* Find the best insn for scheduling, either via max_issue or just take
    the most prioritized available.  */
 static int
-choose_best_insn (fence_t fence, int privileged_n, int *index)
+choose_best_insn (fence_t fence, int privileged_n, bool ran_hook, int *index)
 {
   int can_issue = 0;

@@ -4338,6 +4339,8 @@ choose_best_insn (fence_t fence, int privileged_n, int
*index)
       if (get_expr_cost (expr, fence) < 1)
         {
           can_issue = can_issue_more;
+          if (!ran_hook && !can_issue)
+        can_issue = 1;
           *index = i;

           if (sched_verbose >= 2)
@@ -4360,12 +4363,15 @@ choose_best_insn (fence_t fence, int privileged_n, int
*index)
 /* Choose the best expr from *AV_VLIW_PTR and a suitable register for it.
    BNDS and FENCE are current boundaries and scheduling fence respectively.
    Return the expr found and NULL if nothing can be issued atm.
-   Write to PNEED_STALL the number of cycles to stall if no expr was found. 
*/
+   Write to PNEED_STALL the number of cycles to stall if no expr was found.
+   The positive number of cycles means a data dependency stall, while
+   the negative one means a functional stall (DFA stall).  */
 static expr_t
 find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence,
                 int *pneed_stall)
 {
   expr_t best;
+  bool ran_hook;

   /* Choose the best insn for scheduling via:
      1) sorting the ready list based on priority;
@@ -4376,8 +4382,8 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds,
fence_t fence,
     {
       int privileged_n, index;

-      can_issue_more = invoke_reorder_hooks (fence);
-      if (can_issue_more > 0)
+      can_issue_more = invoke_reorder_hooks (fence, &ran_hook);
+      if (can_issue_more > 0 || !ran_hook)
         {
           /* Try choosing the best insn until we find one that is could be
              scheduled due to liveness restrictions on its destination
register.
@@ -4385,7 +4391,7 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds,
fence_t fence,
              in the order of their priority.  */
           invoke_dfa_lookahead_guard ();
           privileged_n = calculate_privileged_insns ();
-          can_issue_more = choose_best_insn (fence, privileged_n, &index);
+          can_issue_more = choose_best_insn (fence, privileged_n, ran_hook,
&index);
           if (can_issue_more)
             best = find_expr_for_ready (index, true);
         }
@@ -4394,7 +4400,7 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds,
fence_t fence,
       if (can_issue_more == 0)
         {
           best = NULL;
-          *pneed_stall = 1;
+          *pneed_stall = -1;
         }
     }

@@ -4402,8 +4408,9 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds,
fence_t fence,
     {
       can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
                                                can_issue_more);
-      if (can_issue_more == 0)
-        *pneed_stall = 1;
+      if (targetm.sched.variable_issue
+      && can_issue_more == 0)
+        *pneed_stall = -1;
     }

   if (sched_verbose >= 2)
@@ -5471,13 +5478,19 @@ schedule_expr_on_boundary (bnd_t bnd, expr_t expr_vliw,
int seqno)
   return insn;
 }

-/* Stall for N cycles on FENCE.  */
+/* Stall for N cycles on FENCE.  N > 0 means we want a stall because
+   of an unsatisfied data dependence, N < 0 means the DFA stall.
+   The difference is that we need to set the AFTER_STALL_P bit only
+   for a data dependence stall.  */
 static void
 stall_for_cycles (fence_t fence, int n)
 {
   int could_more;

-  could_more = n > 1 || FENCE_ISSUED_INSNS (fence) < issue_rate;
+  if (n > 0)
+    could_more = 1;
+  else
+    n = -n;
   while (n--)
     advance_one_cycle (fence);
   if (could_more)
@@ -5510,7 +5523,7 @@ fill_insns (fence_t fence, int seqno, ilist_t
**scheduled_insns_tailpp)
       blist_t *bnds_tailp1, *bndsp;
       expr_t expr_vliw;
       int need_stall;
-      int was_stall = 0, scheduled_insns = 0, stall_iterations = 0;
+      int was_stall = 0, scheduled_insns = 0;
       int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
       int max_stall = pipelining_p ? 1 : 3;
       bool last_insn_was_debug = false;
@@ -5529,17 +5542,16 @@ fill_insns (fence_t fence, int seqno, ilist_t
**scheduled_insns_tailpp)
       do
         {
           expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
-          if (!expr_vliw && need_stall)
+          if (! expr_vliw && need_stall != 0)
             {
               /* All expressions required a stall.  Do not recompute av sets
                  as we'll get the same answer (modulo the insns between
                  the fence and its boundary, which will not be available for
-                 pipelining).  */
-              gcc_assert (! expr_vliw && stall_iterations < 2);
-              was_stall++;
-          /* If we are going to stall for too long, break to recompute av
+                 pipelining).
+         If we are going to stall for too long, break to recompute av
          sets and bring more insns for pipelining.  */
-          if (need_stall <= 3)
+              was_stall++;
+          if (need_stall < 0 || need_stall <= 3)
         stall_for_cycles (fence, need_stall);
           else
         {
@@ -5548,7 +5560,7 @@ fill_insns (fence_t fence, int seqno, ilist_t
**scheduled_insns_tailpp)
         }
             }
         }
-      while (! expr_vliw && need_stall);
+      while (! expr_vliw && need_stall != 0);

       /* Now either we've selected expr_vliw or we have nothing to schedule. 
*/
       if (!expr_vliw)
@@ -7046,6 +7058,8 @@ reset_sched_cycles_in_current_ebb (void)
         }

       haifa_clock += i;
+          if (sched_verbose >= 2)
+            sel_print ("haifa clock: %d\n", haifa_clock);
     }
       else
     gcc_assert (haifa_cost == 0);
@@ -7064,6 +7078,7 @@ reset_sched_cycles_in_current_ebb (void)
               {
                 sel_print ("advance_state (dfa_new_cycle)\n");
                 debug_state (curr_state);
+        sel_print ("haifa clock: %d\n", haifa_clock + 1);
               }
           }

@@ -7072,8 +7087,11 @@ reset_sched_cycles_in_current_ebb (void)
       cost = state_transition (curr_state, insn);

           if (sched_verbose >= 2)
-            debug_state (curr_state);
-
+        {
+          sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn),
+             haifa_clock + 1);
+              debug_state (curr_state);
+        }
       gcc_assert (cost < 0);
     }

Reply via email to