Hi,

the attached patch fixes two issues with the TX backend optimization
trying to get rid of FPR save/restore operations in some cases.

The first is that the stack pointer might get clobbered on 64 bit if
the backend was able to get rid of all the FPR saves/restores and
these were the only things requiring stack space. For more details
please see the testcase comment.

The second is that the optimization did not work as expected on 31 bit
since the loop setting the fpr save bits only for the 64 bit
call-saved FPRs made use of the call_clobbered information.

Both issues are covered by the new testcase.

Bootstrapped and regtested on s390 and s390x with --with-arch=zEC12.
No regressions.

I'm going to apply the patch to 4.8 and mainline after waiting a few
days for comments.

Bye,

-Andreas-

2013-10-04  Andreas Krebbel  <andreas.kreb...@de.ibm.com>

        * config/s390/s390.c (s390_register_info): Make the call-saved FPR
        loop to work also for 31bit ABI.
        Save the stack pointer for frame_size > 0.

2013-10-04  Andreas Krebbel  <andreas.kreb...@de.ibm.com>

        * gcc.target/s390/htm-nofloat-2.c: New testcase.

---
 gcc/config/s390/s390.c                        |   21 ----!!!!
 gcc/testsuite/gcc.target/s390/htm-nofloat-2.c |   55 ++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 8 deletions(-), 11 modifications(!)

Index: gcc/config/s390/s390.c
===================================================================
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*************** s390_register_info (int clobbered_regs[]
*** 7509,7516 ****
      {
        cfun_frame_layout.fpr_bitmap = 0;
        cfun_frame_layout.high_fprs = 0;
!       if (TARGET_64BIT)
!       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
          /* During reload we have to use the df_regs_ever_live infos
             since reload is marking FPRs used as spill slots there as
             live before actually making the code changes.  Without
--- 7509,7519 ----
      {
        cfun_frame_layout.fpr_bitmap = 0;
        cfun_frame_layout.high_fprs = 0;
! 
!       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
!       {
!         if (call_really_used_regs[i])
!           continue;
          /* During reload we have to use the df_regs_ever_live infos
             since reload is marking FPRs used as spill slots there as
             live before actually making the code changes.  Without
*************** s390_register_info (int clobbered_regs[]
*** 7523,7530 ****
              && !global_regs[i])
            {
              cfun_set_fpr_save (i);
!             cfun_frame_layout.high_fprs++;
            }
      }
  
    for (i = 0; i < 16; i++)
--- 7526,7536 ----
              && !global_regs[i])
            {
              cfun_set_fpr_save (i);
! 
!             if (i >= FPR8_REGNUM)
!               cfun_frame_layout.high_fprs++;
            }
+       }
      }
  
    for (i = 0; i < 16; i++)
*************** s390_register_info (int clobbered_regs[]
*** 7554,7559 ****
--- 7560,7566 ----
        || TARGET_TPF_PROFILING
        || cfun_save_high_fprs_p
        || get_frame_size () > 0
+       || (reload_completed && cfun_frame_layout.frame_size > 0)
        || cfun->calls_alloca
        || cfun->stdarg);
  
*************** s390_register_info (int clobbered_regs[]
*** 7652,7665 ****
            cfun_set_fpr_save (i + FPR0_REGNUM);
        }
      }
- 
-   if (!TARGET_64BIT)
-     {
-       if (df_regs_ever_live_p (FPR4_REGNUM) && !global_regs[FPR4_REGNUM])
-       cfun_set_fpr_save (FPR4_REGNUM);
-       if (df_regs_ever_live_p (FPR6_REGNUM) && !global_regs[FPR6_REGNUM])
-       cfun_set_fpr_save (FPR6_REGNUM);
-     }
  }
  
  /* Fill cfun->machine with info about frame of current function.  */
--- 7659,7664 ----
Index: gcc/testsuite/gcc.target/s390/htm-nofloat-2.c
===================================================================
*** /dev/null
--- gcc/testsuite/gcc.target/s390/htm-nofloat-2.c
***************
*** 0 ****
--- 1,55 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -mhtm -Wa,-march=zEC12 --save-temps" } */
+ 
+ /* __builtin_tbegin has to emit clobbers for all FPRs since the tbegin
+    instruction does not automatically preserves them.  If the
+    transaction body is fully contained in a function the backend tries
+    after reload to get rid of the FPR save/restore operations
+    triggered by the clobbers.  This testcase failed since the backend
+    was able to get rid of all FPR saves/restores and since these were
+    the only stack operations also of the entire stack space.  So even
+    the save/restore of the stack pointer was omitted in the end.
+    However, since the frame layout has been fixed before, the prologue
+    still generated the stack pointer decrement making foo return with
+    a modified stack pointer.  */
+ 
+ void abort(void);
+ 
+ void __attribute__((noinline))
+ foo (int a)
+ {
+   /* This is just to prevent the tbegin code from actually being
+      executed.  That way the test may even run on machines prior to
+      zEC12.  */
+   if (a == 42)
+     return;
+ 
+   if (__builtin_tbegin (0) == 0)
+     __builtin_tend ();
+ }
+ 
+ #ifdef __s390x__
+ #define GET_STACK_POINTER(SP)                 \
+   asm volatile ("stg %%r15, %0" : "=QRST" (SP));
+ #else
+ #define GET_STACK_POINTER(SP)                 \
+   asm volatile ("st %%r15, %0" : "=QR" (SP));
+ #endif
+ 
+ int main(void)
+ {
+   unsigned long new_sp, old_sp;
+ 
+   GET_STACK_POINTER (old_sp);
+   foo(42);
+   GET_STACK_POINTER (new_sp);
+ 
+   if (old_sp != new_sp)
+     abort ();
+ 
+   return 0;
+ }
+ 
+ /* Make sure no FPR saves/restores are emitted.  */
+ /* { dg-final { scan-assembler-not "\tstd\t" } } */
+ /* { dg-final { scan-assembler-not "\tld\t" } } */

Reply via email to