Hello!

Attached patch optimizes x86_64 gtm_jmpbuf layout to avoid copying
return address. Optimized layout uses the same trick as x86_32 - call
used registers are saved just below return address. And there is just
enough space in the call alignment hole to put all of them there.

2012-01-24  Uros Bizjak  <ubiz...@gmail.com>

        * config/x86/target.h (gtm_jmpbuf) [__x86_64__]: Move rip to the
        end of struct.
        * config/x86/sjlj.S (_ITM_beginTransaction) [__x86_64__]: Update
        offset values.  Do not copy return address.  Decrement stack
        by 56 bytes only.
        (GTM_longjump) [__x86_64__]: Update offset values.

Tested on x86_64-pc-linux-gnu.

OK for mainline?

BTW: Do we really need move at the end of GTM_longjump:

        movq    48(%rsi), %r15
        movq    56(%rsi), %rdx
>>      movl    %edi, %eax
        cfi_def_cfa(%rcx, 0)
        cfi_register(%rip, %rdx)
        movq    %rcx, %rsp
        jmp     *%rdx

Uros.
Index: config/x86/sjlj.S
===================================================================
--- config/x86/sjlj.S   (revision 183449)
+++ config/x86/sjlj.S   (working copy)
@@ -61,20 +61,18 @@ SYM(_ITM_beginTransaction):
        cfi_startproc
 #ifdef __x86_64__
        leaq    8(%rsp), %rax
-       movq    (%rsp), %r8
-       subq    $72, %rsp
-       cfi_def_cfa_offset(80)
+       subq    $56, %rsp
+       cfi_def_cfa_offset(64)
        movq    %rax, (%rsp)
-       movq    %r8, 8(%rsp)
-       movq    %rbx, 16(%rsp)
-       movq    %rbp, 24(%rsp)
-       movq    %r12, 32(%rsp)
-       movq    %r13, 40(%rsp)
-       movq    %r14, 48(%rsp)
-       movq    %r15, 56(%rsp)
+       movq    %rbx, 8(%rsp)
+       movq    %rbp, 16(%rsp)
+       movq    %r12, 24(%rsp)
+       movq    %r13, 32(%rsp)
+       movq    %r14, 40(%rsp)
+       movq    %r15, 48(%rsp)
        movq    %rsp, %rsi
        call    SYM(GTM_begin_transaction)
-       addq    $72, %rsp
+       addq    $56, %rsp
        cfi_def_cfa_offset(8)
        ret
 #else
@@ -115,13 +113,13 @@ SYM(GTM_longjmp):
        cfi_startproc
 #ifdef __x86_64__
        movq    (%rsi), %rcx
-       movq    8(%rsi), %rdx
-       movq    16(%rsi), %rbx
-       movq    24(%rsi), %rbp
-       movq    32(%rsi), %r12
-       movq    40(%rsi), %r13
-       movq    48(%rsi), %r14
-       movq    56(%rsi), %r15
+       movq    8(%rsi), %rbx
+       movq    16(%rsi), %rbp
+       movq    24(%rsi), %r12
+       movq    32(%rsi), %r13
+       movq    40(%rsi), %r14
+       movq    48(%rsi), %r15
+       movq    56(%rsi), %rdx
        movl    %edi, %eax
        cfi_def_cfa(%rcx, 0)
        cfi_register(%rip, %rdx)
Index: config/x86/target.h
===================================================================
--- config/x86/target.h (revision 183449)
+++ config/x86/target.h (working copy)
@@ -29,13 +29,13 @@ namespace GTM HIDDEN {
 typedef struct gtm_jmpbuf
 {
   void *cfa;
-  unsigned long rip;
   unsigned long rbx;
   unsigned long rbp;
   unsigned long r12;
   unsigned long r13;
   unsigned long r14;
   unsigned long r15;
+  unsigned long rip;
 } gtm_jmpbuf;
 #else
 typedef struct gtm_jmpbuf

Reply via email to