When we split an argument between the stack and a registers we might end
up with a misaligned access, so use this newly implemented hook to
instead bias the codegen towards the registers rather than the stack.

        PR/82106

gcc/ChangeLog:

        * config/riscv/riscv.cc (struct riscv_arg_info): Add ap_offset.
        (riscv_get_arg_info): Set ap_offset.
        (riscv_arg_extended_on_stack): New hook.
        (TARGET_ARG_EXTENDED_ON_STACK): Likewise.
---
 gcc/config/riscv/riscv.cc                | 30 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/pr82106.c | 12 ++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr82106.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index bbc7547d385..4c922f8fca3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -247,6 +247,9 @@ struct riscv_arg_info {
 
   /* The offset of the first register used, provided num_mrs is nonzero.  */
   unsigned int mr_offset;
+
+  /* The offset from the (virtual) arg pointer of this argument, if it is on 
the stack.  */
+  unsigned int ap_offset;
 };
 
 /* One stage in a constant building sequence.  These sequences have
@@ -6335,6 +6338,7 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
   info->num_fprs = 0;
   info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
   info->stack_p = (num_words - info->num_gprs) != 0;
+  info->ap_offset = (num_words - info->num_gprs) * UNITS_PER_WORD;
 
   if (info->num_gprs || return_p)
     return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
@@ -6405,6 +6409,30 @@ riscv_arg_partial_bytes (cumulative_args_t cum,
   return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
 }
 
+/* Implement TARGET_ARG_EXTENDED_ON_STACK.  */
+
+static int
+riscv_arg_extended_on_stack (cumulative_args_t cum,
+                            const function_arg_info &generic_arg)
+{
+  struct riscv_arg_info arg;
+  poly_int64 mode_size;
+
+  /* For machines with fast unaligned accesses we'll always be better off
+   * mangling the access in place.  */
+  if (! riscv_slow_unaligned_access_p)
+    return 1;
+
+  riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
+                     generic_arg.type, generic_arg.named, false);
+
+  mode_size = GET_MODE_SIZE (generic_arg.mode);
+  gcc_assert (mode_size.is_constant ());
+  /* This assumes the arg pointer is aligned to the type size.  IIRC this isn't
+   * true for the 32-bit embedded ABI, but I don't remember if we implemented 
that.  */
+  return (arg.ap_offset % mode_size.to_constant ()) == 0;
+}
+
 /* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
    VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
    VALTYPE is null and MODE is the mode of the return value.  */
@@ -14881,6 +14909,8 @@ synthesize_and (rtx operands[3])
 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
 #undef TARGET_ARG_PARTIAL_BYTES
 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
+#undef TARGET_ARG_EXTENDED_ON_STACK
+#define TARGET_ARG_EXTENDED_ON_STACK riscv_arg_extended_on_stack
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG riscv_function_arg
 #undef TARGET_FUNCTION_ARG_ADVANCE
diff --git a/gcc/testsuite/gcc.target/riscv/pr82106.c 
b/gcc/testsuite/gcc.target/riscv/pr82106.c
new file mode 100644
index 00000000000..7bcfbaf8723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr82106.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32ifd -mabi=ilp32d -O2 -mtune=generic" } */
+
+double mla(float fa0, float fa1, float fa2, float fa3, float fa4, float fa5, 
+    float fa6, float fa7, int a0, int a1, int a2, int a3, int a4, int a5, int 
+        a6, double a7_s0, double unused)
+{
+  return a7_s0;
+}
+
+/* { dg-final { scan-assembler-not "fld\tfa0,12(sp)" } } */
+/* { dg-final { scan-assembler-times "fld\tfa0,8(sp)" 1 } } */
-- 
2.39.5 (Apple Git-154)

Reply via email to