This patch introduces balancing of long-running instructions that may clog the
pipeline.


gcc/ChangeLog:

2017-10-11  Robin Dapp  <rd...@linux.vnet.ibm.com>

        * config/s390/s390.c (NUM_SIDES): New constant.
        (LONGRUNNING_THRESHOLD): New constant.
        (LATENCY_FACTOR): New constant.
        (s390_sched_score): Lower score for long-running instructions on same
        side.
        (s390_sched_variable_issue): Bookkeeping for long-running instructions.


---
 gcc/config/s390/s390.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 36bc67d..2430933 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -355,6 +355,18 @@ static rtx_insn *last_scheduled_insn;
 #define MAX_SCHED_UNITS 3
 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
 
+#define NUM_SIDES 2
+static int current_side = 1;
+#define LONGRUNNING_THRESHOLD 5
+
+/* Estimate of number of cycles a long-running insn occupies an
+   execution unit.  */
+static unsigned fxu_longrunning[NUM_SIDES];
+static unsigned vfu_longrunning[NUM_SIDES];
+
+/* Factor to scale latencies by, determined by measurements.  */
+#define LATENCY_FACTOR 4
+
 /* The maximum score added for an instruction whose unit hasn't been
    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
    give instruction mix scheduling more priority over instruction
@@ -14483,7 +14495,24 @@ s390_sched_score (rtx_insn *insn)
        if (m & unit_mask)
          score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
                    MAX_SCHED_MIX_DISTANCE);
+
+      unsigned latency = insn_default_latency (insn);
+
+      int other_side = 1 - current_side;
+
+      /* Try to delay long-running insns when side is busy.  */
+      if (latency > LONGRUNNING_THRESHOLD)
+       {
+         if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
+             && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
+           score = MAX (0, score - 10);
+
+         if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
+             && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
+           score = MAX (0, score - 10);
+       }
     }
+
   return score;
 }
 
@@ -14602,6 +14631,8 @@ s390_sched_variable_issue (FILE *file, int verbose, 
rtx_insn *insn, int more)
 {
   last_scheduled_insn = insn;
 
+  bool starts_group = false;
+
   if (s390_tune >= PROCESSOR_2827_ZEC12
       && reload_completed
       && recog_memoized (insn) >= 0)
@@ -14609,6 +14640,11 @@ s390_sched_variable_issue (FILE *file, int verbose, 
rtx_insn *insn, int more)
       unsigned int mask = s390_get_sched_attrmask (insn);
 
       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
+         || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
+         || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
+       starts_group = true;
+
+      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
          || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
        s390_sched_state = S390_SCHED_STATE_CRACKED;
       else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
@@ -14623,8 +14659,13 @@ s390_sched_variable_issue (FILE *file, int verbose, 
rtx_insn *insn, int more)
            case 1:
            case 2:
            case S390_SCHED_STATE_NORMAL:
+             if (s390_sched_state == 0)
+               starts_group = true;
              if (s390_sched_state == S390_SCHED_STATE_NORMAL)
-               s390_sched_state = 1;
+               {
+                 starts_group = true;
+                 s390_sched_state = 1;
+               }
              else
                s390_sched_state++;
 
@@ -14650,6 +14691,27 @@ s390_sched_variable_issue (FILE *file, int verbose, 
rtx_insn *insn, int more)
              last_scheduled_unit_distance[i]++;
        }
 
+      /* If this insn started a new group, the side flipped.  */
+      if (starts_group)
+       current_side = current_side ? 0 : 1;
+
+      for (int i = 0; i < 2; i++)
+       {
+         if (fxu_longrunning[i] >= 1)
+           fxu_longrunning[i] -= 1;
+         if (vfu_longrunning[i] >= 1)
+           vfu_longrunning[i] -= 1;
+       }
+
+      unsigned latency = insn_default_latency (insn);
+      if (latency > LONGRUNNING_THRESHOLD)
+       {
+         if (get_attr_z13_unit_fxu (insn))
+           fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
+         else
+           vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
+       }
+
       if (verbose > 5)
        {
          unsigned int sched_mask;
-- 
2.9.4

Reply via email to