http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45352
--- Comment #7 from Andrey Belevantsev <abel at gcc dot gnu.org> 2010-09-28 11:47:33 UTC --- Thanks for the test, it shows one more case of using issue_rate that I have missed. We need to distinguish between the stalls caused by data dependencies and by lack of functional units, i.e. DFA related stalls. This is because when resetting the cycles, we know that the latter should remain as is (which is actually is checked by the failing assert) but not the former. Fixed by allowing need_stall to be negative and handling this situation in stall_for_cycles. This again fixes all tests with all option combinations. Zdenek, if you can, please test this in your environment, and I will check ia64 and possibly ppc/ppc64, because with this kind of patch I need to make sure that other (honest) targets are unaffected. diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c index 041c471..5bf0e19 100644 --- a/gcc/sel-sched.c +++ b/gcc/sel-sched.c @@ -4051,10 +4051,11 @@ sel_dfa_new_cycle (insn_t insn, fence_t fence) /* Invoke reorder* target hooks on the ready list. Return the number of insns we can issue. FENCE is the current fence. */ static int -invoke_reorder_hooks (fence_t fence) +invoke_reorder_hooks (fence_t fence, bool *pran_hook) { int issue_more; - bool ran_hook = false; + + *pran_hook = false; /* Call the reorder hook at the beginning of the cycle, and call the reorder2 hook in the middle of the cycle. */ @@ -4077,7 +4078,7 @@ invoke_reorder_hooks (fence_t fence) if (pipelining_p) ++ready.n_ready; - ran_hook = true; + *pran_hook = true; } else /* Initialize can_issue_more for variable_issue. */ @@ -4106,14 +4107,14 @@ invoke_reorder_hooks (fence_t fence) ++ready.n_ready; } - ran_hook = true; + *pran_hook = true; } else issue_more = FENCE_ISSUE_MORE (fence); /* Ensure that ready list and vec_av_set are in line with each other, i.e. vec_av_set[i] == ready_element (&ready, i). */ - if (issue_more && ran_hook) + if (issue_more && *pran_hook) { int i, j, n; rtx *arr = ready.vec; @@ -4313,7 +4314,7 @@ get_expr_cost (expr_t expr, fence_t fence) /* Find the best insn for scheduling, either via max_issue or just take the most prioritized available. */ static int -choose_best_insn (fence_t fence, int privileged_n, int *index) +choose_best_insn (fence_t fence, int privileged_n, bool ran_hook, int *index) { int can_issue = 0; @@ -4338,6 +4339,8 @@ choose_best_insn (fence_t fence, int privileged_n, int *index) if (get_expr_cost (expr, fence) < 1) { can_issue = can_issue_more; + if (!ran_hook && !can_issue) + can_issue = 1; *index = i; if (sched_verbose >= 2) @@ -4360,12 +4363,15 @@ choose_best_insn (fence_t fence, int privileged_n, int *index) /* Choose the best expr from *AV_VLIW_PTR and a suitable register for it. BNDS and FENCE are current boundaries and scheduling fence respectively. Return the expr found and NULL if nothing can be issued atm. - Write to PNEED_STALL the number of cycles to stall if no expr was found. */ + Write to PNEED_STALL the number of cycles to stall if no expr was found. + The positive number of cycles means a data dependency stall, while + the negative one means a functional stall (DFA stall). */ static expr_t find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, int *pneed_stall) { expr_t best; + bool ran_hook; /* Choose the best insn for scheduling via: 1) sorting the ready list based on priority; @@ -4376,8 +4382,8 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, { int privileged_n, index; - can_issue_more = invoke_reorder_hooks (fence); - if (can_issue_more > 0) + can_issue_more = invoke_reorder_hooks (fence, &ran_hook); + if (can_issue_more > 0 || !ran_hook) { /* Try choosing the best insn until we find one that is could be scheduled due to liveness restrictions on its destination register. @@ -4385,7 +4391,7 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, in the order of their priority. */ invoke_dfa_lookahead_guard (); privileged_n = calculate_privileged_insns (); - can_issue_more = choose_best_insn (fence, privileged_n, &index); + can_issue_more = choose_best_insn (fence, privileged_n, ran_hook, &index); if (can_issue_more) best = find_expr_for_ready (index, true); } @@ -4394,7 +4400,7 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, if (can_issue_more == 0) { best = NULL; - *pneed_stall = 1; + *pneed_stall = -1; } } @@ -4402,8 +4408,9 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, { can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best), can_issue_more); - if (can_issue_more == 0) - *pneed_stall = 1; + if (targetm.sched.variable_issue + && can_issue_more == 0) + *pneed_stall = -1; } if (sched_verbose >= 2) @@ -5471,13 +5478,19 @@ schedule_expr_on_boundary (bnd_t bnd, expr_t expr_vliw, int seqno) return insn; } -/* Stall for N cycles on FENCE. */ +/* Stall for N cycles on FENCE. N > 0 means we want a stall because + of an unsatisfied data dependence, N < 0 means the DFA stall. + The difference is that we need to set the AFTER_STALL_P bit only + for a data dependence stall. */ static void stall_for_cycles (fence_t fence, int n) { int could_more; - could_more = n > 1 || FENCE_ISSUED_INSNS (fence) < issue_rate; + if (n > 0) + could_more = 1; + else + n = -n; while (n--) advance_one_cycle (fence); if (could_more) @@ -5510,7 +5523,7 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp) blist_t *bnds_tailp1, *bndsp; expr_t expr_vliw; int need_stall; - int was_stall = 0, scheduled_insns = 0, stall_iterations = 0; + int was_stall = 0, scheduled_insns = 0; int max_insns = pipelining_p ? issue_rate : 2 * issue_rate; int max_stall = pipelining_p ? 1 : 3; bool last_insn_was_debug = false; @@ -5529,17 +5542,16 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp) do { expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall); - if (!expr_vliw && need_stall) + if (! expr_vliw && need_stall != 0) { /* All expressions required a stall. Do not recompute av sets as we'll get the same answer (modulo the insns between the fence and its boundary, which will not be available for - pipelining). */ - gcc_assert (! expr_vliw && stall_iterations < 2); - was_stall++; - /* If we are going to stall for too long, break to recompute av + pipelining). + If we are going to stall for too long, break to recompute av sets and bring more insns for pipelining. */ - if (need_stall <= 3) + was_stall++; + if (need_stall < 0 || need_stall <= 3) stall_for_cycles (fence, need_stall); else { @@ -5548,7 +5560,7 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp) } } } - while (! expr_vliw && need_stall); + while (! expr_vliw && need_stall != 0); /* Now either we've selected expr_vliw or we have nothing to schedule. */ if (!expr_vliw) @@ -7046,6 +7058,8 @@ reset_sched_cycles_in_current_ebb (void) } haifa_clock += i; + if (sched_verbose >= 2) + sel_print ("haifa clock: %d\n", haifa_clock); } else gcc_assert (haifa_cost == 0); @@ -7064,6 +7078,7 @@ reset_sched_cycles_in_current_ebb (void) { sel_print ("advance_state (dfa_new_cycle)\n"); debug_state (curr_state); + sel_print ("haifa clock: %d\n", haifa_clock + 1); } } @@ -7072,8 +7087,11 @@ reset_sched_cycles_in_current_ebb (void) cost = state_transition (curr_state, insn); if (sched_verbose >= 2) - debug_state (curr_state); - + { + sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn), + haifa_clock + 1); + debug_state (curr_state); + } gcc_assert (cost < 0); }