This patch introduces a conservative loop unrolling heuristic for the
RISC-V backend.
The option is enabled by default at -O2 and above for speed-optimized
builds, together with -funroll-loops, so that small loops benefit from
unrolling without exposing larger loops to its costs. When the user
explicitly passes -funroll-loops or -funroll-all-loops,
-munroll-only-small-loops is automatically disabled so that the full
unroller heuristics apply as before.
gcc/ChangeLog:
* common/config/riscv/riscv-common.cc
(riscv_option_optimization_table): Enable -funroll-loops and
-munroll-only-small-loops at -O2 and above for speed.
* config/riscv/riscv.cc (riscv_loop_unroll_adjust): New function.
(riscv_option_override): Disable -munroll-only-small-loops when
-funroll-loops or -funroll-all-loops is explicitly requested.
(TARGET_LOOP_UNROLL_ADJUST): Define.
* config/riscv/riscv.opt (munroll-only-small-loops): New option.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/unroll-small-loop.c: New test.
* gcc.target/riscv/unroll-large-loop.c: New test.
* gcc.target/riscv/unroll-explicit.c: New test.
---
gcc/common/config/riscv/riscv-common.cc | 4 +++
gcc/config/riscv/riscv.cc | 27 +++++++++++++++++++
gcc/config/riscv/riscv.opt | 4 +++
.../gcc.target/riscv/unroll-explicit.c | 21 +++++++++++++++
.../gcc.target/riscv/unroll-large-loop.c | 20 ++++++++++++++
.../gcc.target/riscv/unroll-small-loop.c | 22 +++++++++++++++
6 files changed, 98 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/unroll-explicit.c
create mode 100644 gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
create mode 100644 gcc/testsuite/gcc.target/riscv/unroll-small-loop.c
diff --git a/gcc/common/config/riscv/riscv-common.cc
b/gcc/common/config/riscv/riscv-common.cc
index 74929381a06..cc6988caef4 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -2344,6 +2344,10 @@ static const struct default_options
riscv_option_optimization_table[] =
{ OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
{ OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
#endif
+ /* Turn on -funroll-loops with -munroll-only-small-loops to enable
+ small loop unrolling at -O2. */
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
{ OPT_LEVELS_NONE, 0, NULL, 0 }
};
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index cc1b8cd16a9..f2c3f8e6eff 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5055,6 +5055,21 @@ riscv_insn_cost (rtx_insn *insn, bool speed)
return cost;
}
+/* Implement TARGET_LOOP_UNROLL_ADJUST. */
+
+static unsigned
+riscv_loop_unroll_adjust (unsigned nunroll, class loop *loop)
+{
+ if (riscv_unroll_only_small_loops)
+ {
+ if (loop->ninsns <= 4)
+ return MIN (8, nunroll);
+ else
+ return 1;
+ }
+ return nunroll;
+}
+
/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
but we consider cost units of branch instructions equal to cost units of
other instructions. */
@@ -12139,6 +12154,16 @@ riscv_option_override (void)
flag_pcc_struct_return = 0;
+ /* Explicit -funroll-loops or -funroll-all-loops turns
+ -munroll-only-small-loops off, allowing the unroller to handle
+ all loops without the conservative small-loop restriction. */
+ if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
+ || (OPTION_SET_P (flag_unroll_all_loops) && flag_unroll_all_loops))
+ {
+ if (!OPTION_SET_P (riscv_unroll_only_small_loops))
+ riscv_unroll_only_small_loops = 0;
+ }
+
if (flag_pic)
g_switch_value = 0;
@@ -16364,6 +16389,8 @@ riscv_memtag_tag_bitsize ()
#define TARGET_RTX_COSTS riscv_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST riscv_address_cost
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST riscv_loop_unroll_adjust
#undef TARGET_INSN_COST
#define TARGET_INSN_COST riscv_insn_cost
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index c2670ad87b2..3226367bf8a 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -454,3 +454,7 @@ Enum(arcv_mpy_option) String(10c) Value(ARCV_MPY_OPTION_10C)
mmpy-option=
Target RejectNegative Joined Enum(arcv_mpy_option) Var(arcv_mpy_option)
Init(ARCV_MPY_OPTION_2C)
The type of MPY unit used by the RMX-100 core (to be used in combination with
-mtune=arc-v-rmx-100-series) (default: 2c).
+
+munroll-only-small-loops
+Target Var(riscv_unroll_only_small_loops) Init(0) Save
+Enable conservative small loop unrolling.
diff --git a/gcc/testsuite/gcc.target/riscv/unroll-explicit.c
b/gcc/testsuite/gcc.target/riscv/unroll-explicit.c
new file mode 100644
index 00000000000..b5e537d20c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/unroll-explicit.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll-details" } */
+
+/* Verify that when the user explicitly passes -funroll-loops,
+ -munroll-only-small-loops is disabled and large loops can also
+ be unrolled. */
+
+void
+large_loop_explicit (int *a, int *b, int *c, int *d, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ d[i] = a[i] * b[i] - c[i];
+ b[i] = c[i] + d[i] + a[i];
+ c[i] = a[i] - d[i] + b[i];
+ }
+}
+
+/* { dg-final { scan-rtl-dump "Unrolled loop" "loop2_unroll" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
b/gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
new file mode 100644
index 00000000000..ad470c34fdd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details" } */
+
+/* Verify that a large loop (4 or more insns in the body) is NOT unrolled
+ when -munroll-only-small-loops is in effect (the default at -O2). */
+
+void
+large_loop (int *a, int *b, int *c, int *d, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = b[i] + c[i];
+ d[i] = a[i] * b[i] - c[i];
+ b[i] = c[i] + d[i] + a[i];
+ c[i] = a[i] - d[i] + b[i];
+ }
+}
+
+/* { dg-final { scan-rtl-dump-not "Unrolled loop" "loop2_unroll" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/unroll-small-loop.c
b/gcc/testsuite/gcc.target/riscv/unroll-small-loop.c
new file mode 100644
index 00000000000..49dcfa04e41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/unroll-small-loop.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details" } */
+
+/* Verify -munroll-only-small-loops (default ON at -O2) unrolls a small
+ loop. The do-while form uses the counter itself as the induction
+ variable, so the RTL loop body collapses to roughly:
+ asm ; <-- empty volatile asm (kept as one insn)
+ addi n, n, -1
+ bnez n, .L
+ giving loop->ninsns <= 4 and triggering the small-loop unroll path
+ in riscv_loop_unroll_adjust. The empty volatile asm prevents the
+ loop from being deleted as dead code. */
+
+void
+small_loop (int n)
+{
+ do
+ __asm__ volatile ("");
+ while (--n);
+}
+
+/* { dg-final { scan-rtl-dump "Unrolled loop" "loop2_unroll" } } */
--
2.39.5 (Apple Git-154)