When we split an argument between the stack and a registers we might end up with a misaligned access, so use this newly implemented hook to instead bias the codegen towards the registers rather than the stack.
PR/82106 gcc/ChangeLog: * config/riscv/riscv.cc (struct riscv_arg_info): Add ap_offset. (riscv_get_arg_info): Set ap_offset. (riscv_arg_extended_on_stack): New hook. (TARGET_ARG_EXTENDED_ON_STACK): Likewise. --- gcc/config/riscv/riscv.cc | 30 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/riscv/pr82106.c | 12 ++++++++++ 2 files changed, 42 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/pr82106.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index bbc7547d385..4c922f8fca3 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -247,6 +247,9 @@ struct riscv_arg_info { /* The offset of the first register used, provided num_mrs is nonzero. */ unsigned int mr_offset; + + /* The offset from the (virtual) arg pointer of this argument, if it is on the stack. */ + unsigned int ap_offset; }; /* One stage in a constant building sequence. These sequences have @@ -6335,6 +6338,7 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, info->num_fprs = 0; info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); info->stack_p = (num_words - info->num_gprs) != 0; + info->ap_offset = (num_words - info->num_gprs) * UNITS_PER_WORD; if (info->num_gprs || return_p) return gen_rtx_REG (mode, gpr_base + info->gpr_offset); @@ -6405,6 +6409,30 @@ riscv_arg_partial_bytes (cumulative_args_t cum, return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; } +/* Implement TARGET_ARG_EXTENDED_ON_STACK. */ + +static int +riscv_arg_extended_on_stack (cumulative_args_t cum, + const function_arg_info &generic_arg) +{ + struct riscv_arg_info arg; + poly_int64 mode_size; + + /* For machines with fast unaligned accesses we'll always be better off + * mangling the access in place. */ + if (! riscv_slow_unaligned_access_p) + return 1; + + riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode, + generic_arg.type, generic_arg.named, false); + + mode_size = GET_MODE_SIZE (generic_arg.mode); + gcc_assert (mode_size.is_constant ()); + /* This assumes the arg pointer is aligned to the type size. IIRC this isn't + * true for the 32-bit embedded ABI, but I don't remember if we implemented that. */ + return (arg.ap_offset % mode_size.to_constant ()) == 0; +} + /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, VALTYPE is the return type and MODE is VOIDmode. For libcalls, VALTYPE is null and MODE is the mode of the return value. */ @@ -14881,6 +14909,8 @@ synthesize_and (rtx operands[3]) #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference #undef TARGET_ARG_PARTIAL_BYTES #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes +#undef TARGET_ARG_EXTENDED_ON_STACK +#define TARGET_ARG_EXTENDED_ON_STACK riscv_arg_extended_on_stack #undef TARGET_FUNCTION_ARG #define TARGET_FUNCTION_ARG riscv_function_arg #undef TARGET_FUNCTION_ARG_ADVANCE diff --git a/gcc/testsuite/gcc.target/riscv/pr82106.c b/gcc/testsuite/gcc.target/riscv/pr82106.c new file mode 100644 index 00000000000..7bcfbaf8723 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr82106.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32ifd -mabi=ilp32d -O2 -mtune=generic" } */ + +double mla(float fa0, float fa1, float fa2, float fa3, float fa4, float fa5, + float fa6, float fa7, int a0, int a1, int a2, int a3, int a4, int a5, int + a6, double a7_s0, double unused) +{ + return a7_s0; +} + +/* { dg-final { scan-assembler-not "fld\tfa0,12(sp)" } } */ +/* { dg-final { scan-assembler-times "fld\tfa0,8(sp)" 1 } } */ -- 2.39.5 (Apple Git-154)