Hi.
This patch makes the scheduler prefer instructions with higher cost if two
given instructions are equally good.
Issuing more restricted instructions first is particularly useful on in-order
cores because it increases the
number of dual issue opportunities.
For example, on AArch64, instead of:
add x11, x2, 96
mov x4, x2
mov w10, 1
ldrh w5, [x0]
ldrh w13, [x0, 2]
ldrh w9, [x0, 4]
ldrh w12, [x0, 6]
b .L759
Generate:
ldrh w5, [x0]
add x11, x2, 96
ldrh w13, [x0, 2]
mov x4, x2
ldrh w9, [x0, 4]
mov w10, 1
ldrh w12, [x0, 6]
b .L759
Bootstrapped and regtested on aarch64-none-linux-gnu and there are no
regressions.
Ok for trunk?
Thanks,
Vlad
gcc/
Changelog for gcc/Changelog
2018-09-11 Vlad Lazar <[email protected]>
* haifa-sched.c (rank_for_schedule): Schedule by INSN_COST.
(rfs_decision): New scheduling decision.
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 4f0221f6f43..3095e0375b5 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -2542,7 +2542,7 @@ enum rfs_decision {
RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK,
RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION,
RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX,
- RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_N };
+ RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_COST, RFS_N };
/* Corresponding strings for print outs. */
static const char *rfs_str[RFS_N] = {
@@ -2550,7 +2550,7 @@ static const char *rfs_str[RFS_N] = {
"RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK",
"RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION",
"RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX",
- "RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION" };
+ "RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION", "RFS_COST" };
/* Statistical breakdown of rank_for_schedule decisions. */
struct rank_for_schedule_stats_t { unsigned stats[RFS_N]; };
@@ -2803,6 +2803,14 @@ rank_for_schedule (const void *x, const void *y)
if (flag_sched_dep_count_heuristic && val != 0)
return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2);
+ /* Sort by INSN_COST rather than INSN_LUID. This means that instructions
+ which take longer to execute are prioritised and it leads to more
+ dual-issue opportunities on in-order cores which have this feature. */
+
+ if (INSN_COST (tmp) != INSN_COST (tmp2))
+ return rfs_result (RFS_COST, INSN_COST (tmp2) - INSN_COST (tmp),
+ tmp, tmp2);
+
/* If insns are equally good, sort by INSN_LUID (original insn order),
so that we make the sort stable. This minimizes instruction movement,
thus minimizing sched's effect on debugging and cross-jumping. */