------- Comment #10 from jakub at gcc dot gnu dot org  2007-11-22 17:04 -------
The remaining difference is register allocation issue:

time ./pr23305-vanilla; time ./pr23305-fixed

real    0m4.030s
user    0m4.028s
sys     0m0.002s

real    0m1.593s
user    0m1.592s
sys     0m0.001s

with hand-edited changes:

--- pr23305-vanilla.s   2007-11-22 17:57:15.000000000 +0100
+++ pr23305-fixed.s     2007-11-22 17:57:56.000000000 +0100
@@ -95,49 +95,49 @@ _Z13s000005a_testv:
        subq    $24, %rsp
 .LCFI1:
        movq    _ZL3dpe(%rip), %rdx
        movq    _ZL3dpb(%rip), %rax
        cmpq    %rax, %rdx
        je      .L13
+       movabsq $4613937818241073152, %r8
        .p2align 4,,10
        .p2align 3
 .L14:
-       movabsq $4613937818241073152, %r8
        movq    %r8, (%rax)
        addq    $8, %rax
        cmpq    %rax, %rdx
        jne     .L14
 .L13:
        movq    _ZL3Dpe(%rip), %rdx
        movq    _ZL3Dpb(%rip), %rax
        cmpq    %rax, %rdx
        je      .L15
+       movabsq $4613937818241073152, %rdi
        .p2align 4,,10
        .p2align 3
 .L16:
-       movabsq $4613937818241073152, %rdi
        movq    %rdi, (%rax)
        addq    $8, %rax
        cmpq    %rax, %rdx
        jne     .L16
 .L15:
        movq    _ZL5rrDPe(%rip), %rdx
        movq    _ZL5rrDPb(%rip), %rax
        movsd   _ZL1D(%rip), %xmm0
        cmpq    %rdx, %rax
        movsd   %xmm0, 8(%rsp)
        je      .L18
+       movsd   8(%rsp), %xmm0
        .p2align 4,,10
        .p2align 3
 .L24:
-       movsd   8(%rsp), %xmm0
        addsd   (%rax), %xmm0
        addq    $8, %rax
        cmpq    %rax, %rdx
-       movsd   %xmm0, 8(%rsp)
        jne     .L24
+       movsd   %xmm0, 8(%rsp)
 .L18:
        movsd   8(%rsp), %xmm0
        ucomisd .LC2(%rip), %xmm0
        jp      .L23
        jne     .L23
        addq    $24, %rsp

In lreg dump we have:

(code_label:HI 98 35 97 7 24 "" [1 uses])
(note:HI 97 98 45 7 [bb 7] NOTE_INSN_BASIC_BLOCK)
(insn:HI 45 97 46 7 pr23305.ii:28564 (set (reg/v:DF 64 [ result ])
        (plus:DF (reg/v:DF 64 [ result ])
            (mem/s:DF (reg:DI 58 [ ivtmp.254 ]) [29 <variable>.value+0 S8
A8]))) 680 {*fop_df_comm_sse} (nil))
(insn:HI 46 45 48 7 pr23305.ii:28564 (parallel [
            (set (reg:DI 58 [ ivtmp.254 ])
                (plus:DI (reg:DI 58 [ ivtmp.254 ])
                    (const_int 8 [0x8])))
            (clobber (reg:CC 17 flags))  
        ]) 244 {*adddi_1_rex64} (expr_list:REG_UNUSED (reg:CC 17 flags)
        (nil)))
(insn:HI 48 46 49 7 pr23305.ii:28673 (set (reg:CCZ 17 flags)
        (compare:CCZ (reg/f:DI 60 [ last$current$current$current ])
            (reg:DI 58 [ ivtmp.254 ]))) 2 {cmpdi_1_insn_rex64} (nil))
(jump_insn:HI 49 48 50 7 pr23305.ii:28673 (set (pc)
        (if_then_else (ne (reg:CCZ 17 flags)
                (const_int 0 [0x0]))
            (label_ref:DI 98)
            (pc))) 579 {*jcc_1} (expr_list:REG_DEAD (reg:CCZ 17 flags)
        (expr_list:REG_BR_PROB (const_int 9100 [0x238c])
            (nil))))

and
Register 64 pref SSE_FIRST_REG, else SSE_REGS
Register 64 used 5 times across 23 insns; set 2 times; user var; crosses 3
calls; pref SSE_FIRST_REG, else SSE_REGS.

Yet global alloc puts it into 8(%rsp), which is certainly fine, except in a the
tight loop.


-- 

jakub at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |vmakarov at gcc dot gnu dot
                   |                            |org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23305

Reply via email to