Hi,

this is the last issue with nested APCS frames according to our testing.  When 
the IP register needs to be preserved on entry and r3 isn't free and there are
no arguments to push, the prologue creates a slot above the frame, so various 
internal offsets need to be adjusted.  One has been missed, leading to:

        sub     ip, fp, #20
        fldmfdd ip!, {d8}
        sub     sp, fp, #16
        ldmfd   sp, {r3, fp, sp, pc}

in the epilogue of the nested frame.  That's wrong because the difference 
between the 2 immediates must be equal to the size of the saved FP registers.

Tested on ARM/VxWorks (where it fixes several ACATS tests at -O2) and 
ARM/EABI, OK for the mainline?


2013-12-22  Eric Botcazou  <ebotca...@adacore.com>

        * config/arm/arm.c (arm_get_frame_offsets): Revamp long lines.
        (arm_expand_epilogue_apcs_frame): Take into account the number of bytes
        used to save the static chain register in the computation of the offset
        from which the FP registers need to be restored.


2013-12-22  Eric Botcazou  <ebotca...@adacore.com>

        * gcc.target/arm/neon-nested-apcs.c: New test.


-- 
Eric Botcazou
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 206161)
+++ config/arm/arm.c	(working copy)
@@ -20316,8 +20316,10 @@ arm_get_frame_offsets (void)
   offsets->saved_args = crtl->args.pretend_args_size;
 
   /* In Thumb mode this is incorrect, but never used.  */
-  offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
-                   arm_compute_static_chain_stack_bytes();
+  offsets->frame
+    = offsets->saved_args
+	+ arm_compute_static_chain_stack_bytes ()
+	+ (frame_pointer_needed ? 4 : 0);
 
   if (TARGET_32BIT)
     {
@@ -20357,9 +20359,10 @@ arm_get_frame_offsets (void)
     }
 
   /* Saved registers include the stack frame.  */
-  offsets->saved_regs = offsets->saved_args + saved +
-                        arm_compute_static_chain_stack_bytes();
+  offsets->saved_regs
+    = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
+
   /* A leaf function does not need any stack alignment if it has nothing
      on the stack.  */
   if (leaf && frame_size == 0
@@ -27044,7 +27047,10 @@ arm_expand_epilogue_apcs_frame (bool rea
   saved_regs_mask = offsets->saved_regs_mask;
 
   /* Find the offset of the floating-point save area in the frame.  */
-  floats_from_frame = offsets->saved_args - offsets->frame;
+  floats_from_frame
+    = offsets->saved_args
+	+ arm_compute_static_chain_stack_bytes ()
+	- offsets->frame;
 
   /* Compute how many core registers saved and how far away the floats are.  */
   for (i = 0; i <= LAST_ARM_REGNUM; i++)
/* { dg-do run } */
/* { dg-require-effective-target arm_neon_hw } */
/* { dg-options "-fno-omit-frame-pointer -mapcs-frame -O -mfloat-abi=softfp -mfpu=neon" } */

extern void abort (void);

float data;

void __attribute__((noinline, noclone)) bar (float f)
{
  data = f;
}

float __attribute__((noinline, noclone)) foo (float f)
{
  int error_reported = 0;

  void __attribute__((noinline, noclone)) 
  nested (int a, int b, int c, int d, float f0, float f1, float f2, float f3)
  {
    float e;

    if (f3 > f2)
      e = f3;
    else
      e = f2;

    if (f0 - f1 > e)
      {
	error_reported = a + b + c + d;
	bar (f0);
	bar (e);
      }
  }

  nested (1, 2, 3, 4, 1.0, 1.0, 3.5, 4.2);
  return f + (float)error_reported;
}

#define PI 3.1415927f

int main (void)
{
  if (foo (PI) != PI)
    abort ();
  return 0;
}

Reply via email to