Hi,
this is the last issue with nested APCS frames according to our testing. When
the IP register needs to be preserved on entry and r3 isn't free and there are
no arguments to push, the prologue creates a slot above the frame, so various
internal offsets need to be adjusted. One has been missed, leading to:
sub ip, fp, #20
fldmfdd ip!, {d8}
sub sp, fp, #16
ldmfd sp, {r3, fp, sp, pc}
in the epilogue of the nested frame. That's wrong because the difference
between the 2 immediates must be equal to the size of the saved FP registers.
Tested on ARM/VxWorks (where it fixes several ACATS tests at -O2) and
ARM/EABI, OK for the mainline?
2013-12-22 Eric Botcazou <ebotca...@adacore.com>
* config/arm/arm.c (arm_get_frame_offsets): Revamp long lines.
(arm_expand_epilogue_apcs_frame): Take into account the number of bytes
used to save the static chain register in the computation of the offset
from which the FP registers need to be restored.
2013-12-22 Eric Botcazou <ebotca...@adacore.com>
* gcc.target/arm/neon-nested-apcs.c: New test.
--
Eric Botcazou
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c (revision 206161)
+++ config/arm/arm.c (working copy)
@@ -20316,8 +20316,10 @@ arm_get_frame_offsets (void)
offsets->saved_args = crtl->args.pretend_args_size;
/* In Thumb mode this is incorrect, but never used. */
- offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
- arm_compute_static_chain_stack_bytes();
+ offsets->frame
+ = offsets->saved_args
+ + arm_compute_static_chain_stack_bytes ()
+ + (frame_pointer_needed ? 4 : 0);
if (TARGET_32BIT)
{
@@ -20357,9 +20359,10 @@ arm_get_frame_offsets (void)
}
/* Saved registers include the stack frame. */
- offsets->saved_regs = offsets->saved_args + saved +
- arm_compute_static_chain_stack_bytes();
+ offsets->saved_regs
+ = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
+
/* A leaf function does not need any stack alignment if it has nothing
on the stack. */
if (leaf && frame_size == 0
@@ -27044,7 +27047,10 @@ arm_expand_epilogue_apcs_frame (bool rea
saved_regs_mask = offsets->saved_regs_mask;
/* Find the offset of the floating-point save area in the frame. */
- floats_from_frame = offsets->saved_args - offsets->frame;
+ floats_from_frame
+ = offsets->saved_args
+ + arm_compute_static_chain_stack_bytes ()
+ - offsets->frame;
/* Compute how many core registers saved and how far away the floats are. */
for (i = 0; i <= LAST_ARM_REGNUM; i++)
/* { dg-do run } */
/* { dg-require-effective-target arm_neon_hw } */
/* { dg-options "-fno-omit-frame-pointer -mapcs-frame -O -mfloat-abi=softfp -mfpu=neon" } */
extern void abort (void);
float data;
void __attribute__((noinline, noclone)) bar (float f)
{
data = f;
}
float __attribute__((noinline, noclone)) foo (float f)
{
int error_reported = 0;
void __attribute__((noinline, noclone))
nested (int a, int b, int c, int d, float f0, float f1, float f2, float f3)
{
float e;
if (f3 > f2)
e = f3;
else
e = f2;
if (f0 - f1 > e)
{
error_reported = a + b + c + d;
bar (f0);
bar (e);
}
}
nested (1, 2, 3, 4, 1.0, 1.0, 3.5, 4.2);
return f + (float)error_reported;
}
#define PI 3.1415927f
int main (void)
{
if (foo (PI) != PI)
abort ();
return 0;
}