------- Comment #10 from jakub at gcc dot gnu dot org 2007-11-22 17:04 -------
The remaining difference is register allocation issue:
time ./pr23305-vanilla; time ./pr23305-fixed
real 0m4.030s
user 0m4.028s
sys 0m0.002s
real 0m1.593s
user 0m1.592s
sys 0m0.001s
with hand-edited changes:
--- pr23305-vanilla.s 2007-11-22 17:57:15.000000000 +0100
+++ pr23305-fixed.s 2007-11-22 17:57:56.000000000 +0100
@@ -95,49 +95,49 @@ _Z13s000005a_testv:
subq $24, %rsp
.LCFI1:
movq _ZL3dpe(%rip), %rdx
movq _ZL3dpb(%rip), %rax
cmpq %rax, %rdx
je .L13
+ movabsq $4613937818241073152, %r8
.p2align 4,,10
.p2align 3
.L14:
- movabsq $4613937818241073152, %r8
movq %r8, (%rax)
addq $8, %rax
cmpq %rax, %rdx
jne .L14
.L13:
movq _ZL3Dpe(%rip), %rdx
movq _ZL3Dpb(%rip), %rax
cmpq %rax, %rdx
je .L15
+ movabsq $4613937818241073152, %rdi
.p2align 4,,10
.p2align 3
.L16:
- movabsq $4613937818241073152, %rdi
movq %rdi, (%rax)
addq $8, %rax
cmpq %rax, %rdx
jne .L16
.L15:
movq _ZL5rrDPe(%rip), %rdx
movq _ZL5rrDPb(%rip), %rax
movsd _ZL1D(%rip), %xmm0
cmpq %rdx, %rax
movsd %xmm0, 8(%rsp)
je .L18
+ movsd 8(%rsp), %xmm0
.p2align 4,,10
.p2align 3
.L24:
- movsd 8(%rsp), %xmm0
addsd (%rax), %xmm0
addq $8, %rax
cmpq %rax, %rdx
- movsd %xmm0, 8(%rsp)
jne .L24
+ movsd %xmm0, 8(%rsp)
.L18:
movsd 8(%rsp), %xmm0
ucomisd .LC2(%rip), %xmm0
jp .L23
jne .L23
addq $24, %rsp
In lreg dump we have:
(code_label:HI 98 35 97 7 24 "" [1 uses])
(note:HI 97 98 45 7 [bb 7] NOTE_INSN_BASIC_BLOCK)
(insn:HI 45 97 46 7 pr23305.ii:28564 (set (reg/v:DF 64 [ result ])
(plus:DF (reg/v:DF 64 [ result ])
(mem/s:DF (reg:DI 58 [ ivtmp.254 ]) [29 <variable>.value+0 S8
A8]))) 680 {*fop_df_comm_sse} (nil))
(insn:HI 46 45 48 7 pr23305.ii:28564 (parallel [
(set (reg:DI 58 [ ivtmp.254 ])
(plus:DI (reg:DI 58 [ ivtmp.254 ])
(const_int 8 [0x8])))
(clobber (reg:CC 17 flags))
]) 244 {*adddi_1_rex64} (expr_list:REG_UNUSED (reg:CC 17 flags)
(nil)))
(insn:HI 48 46 49 7 pr23305.ii:28673 (set (reg:CCZ 17 flags)
(compare:CCZ (reg/f:DI 60 [ last$current$current$current ])
(reg:DI 58 [ ivtmp.254 ]))) 2 {cmpdi_1_insn_rex64} (nil))
(jump_insn:HI 49 48 50 7 pr23305.ii:28673 (set (pc)
(if_then_else (ne (reg:CCZ 17 flags)
(const_int 0 [0x0]))
(label_ref:DI 98)
(pc))) 579 {*jcc_1} (expr_list:REG_DEAD (reg:CCZ 17 flags)
(expr_list:REG_BR_PROB (const_int 9100 [0x238c])
(nil))))
and
Register 64 pref SSE_FIRST_REG, else SSE_REGS
Register 64 used 5 times across 23 insns; set 2 times; user var; crosses 3
calls; pref SSE_FIRST_REG, else SSE_REGS.
Yet global alloc puts it into 8(%rsp), which is certainly fine, except in a the
tight loop.
--
jakub at gcc dot gnu dot org changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |vmakarov at gcc dot gnu dot
| |org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23305