https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102178
--- Comment #33 from Richard Biener <rguenth at gcc dot gnu.org> --- (In reply to Richard Biener from comment #32) > The diff with ! added is quite short, I've yet have to measure any > effect on LBM: > > --- streamcollide.s.orig 2022-04-25 11:37:01.638733951 +0200 > +++ streamcollide.s2 2022-04-25 11:35:54.885849296 +0200 > @@ -33,28 +33,24 @@ > .p2align 4 > .p2align 3 > .L12: > - movq .LC0(%rip), %rax > - vmovsd .LC4(%rip), %xmm6 > + vmovsd .LC0(%rip), %xmm2 > + vmovsd .LC1(%rip), %xmm13 > + movabsq $0x3ff01878b7a1c25d, %rax > movabsq $0x3fef85af6c69b5a6, %rdi > + vmovsd .LC2(%rip), %xmm12 > + vmovsd .LC3(%rip), %xmm14 > movabsq $0x3ff03db8fde2ef4e, %r8 > + movabsq $0x3fefcea39c51dabe, %r9 > + vmovsd .LC4(%rip), %xmm6 > vmovsd .LC5(%rip), %xmm7 > movq .LC8(%rip), %r11 > - movabsq $0x3fefcea39c51dabe, %r9 > movq .LC6(%rip), %rdx > movq .LC7(%rip), %rcx > - vmovq %rax, %xmm2 > - vmovq %rax, %xmm4 > - movq .LC1(%rip), %rax > movq %r11, %rsi > movq %r11, %r12 > - vmovq %rax, %xmm13 > - vmovq %rax, %xmm8 > - movq .LC2(%rip), %rax > - vmovq %rax, %xmm12 > - vmovq %rax, %xmm5 > - movq .LC3(%rip), %rax > - vmovq %rax, %xmm14 > - movabsq $0x3ff01878b7a1c25d, %rax > + vmovsd %xmm2, %xmm2, %xmm4 > + vmovsd %xmm13, %xmm13, %xmm8 > + vmovsd %xmm12, %xmm12, %xmm5 > vmovsd %xmm14, -16(%rsp) > .L5: > vmulsd .LC9(%rip), %xmm0, %xmm3 Huh, and the net effect is that the + code is 9% _slower_!?