https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99395

--- Comment #3 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
ICC version seems to run faster
000000000040a050 <s116>:
  40a050:       55                      push   %rbp
  40a051:       48 89 e5                mov    %rsp,%rbp
  40a054:       48 83 e4 e0             and    $0xffffffffffffffe0,%rsp
  40a058:       41 57                   push   %r15
  40a05a:       53                      push   %rbx
  40a05b:       48 83 ec 10             sub    $0x10,%rsp
  40a05f:       48 89 fb                mov    %rdi,%rbx
  40a062:       bf 74 f5 42 00          mov    $0x42f574,%edi
  40a067:       e8 14 cc 00 00          call   416c80 <initialise_arrays>
  40a06c:       48 89 df                mov    %rbx,%rdi
  40a06f:       33 f6                   xor    %esi,%esi
  40a071:       e8 4a 70 ff ff          call   4010c0 <gettimeofday@plt>
  40a076:       33 c0                   xor    %eax,%eax
  40a078:       41 89 c7                mov    %eax,%r15d
  40a07b:       33 d2                   xor    %edx,%edx
  40a07d:       0f 1f 00                nopl   (%rax)
  40a080:       c5 fc 10 04 95 04 9d    vmovups 0x579d04(,%rdx,4),%ymm0
  40a087:       57 00 
  40a089:       c5 fc 10 14 95 24 9d    vmovups 0x579d24(,%rdx,4),%ymm2
  40a090:       57 00 
  40a092:       c5 fc 10 24 95 44 9d    vmovups 0x579d44(,%rdx,4),%ymm4
  40a099:       57 00 
  40a09b:       c5 fc 10 34 95 64 9d    vmovups 0x579d64(,%rdx,4),%ymm6
  40a0a2:       57 00 
  40a0a4:       c5 fc 59 0c 95 00 9d    vmulps 0x579d00(,%rdx,4),%ymm0,%ymm1
  40a0ab:       57 00 
  40a0ad:       c5 ec 59 1c 95 20 9d    vmulps 0x579d20(,%rdx,4),%ymm2,%ymm3
  40a0b4:       57 00 
  40a0b6:       c5 dc 59 2c 95 40 9d    vmulps 0x579d40(,%rdx,4),%ymm4,%ymm5
  40a0bd:       57 00 
  40a0bf:       c5 cc 59 3c 95 60 9d    vmulps 0x579d60(,%rdx,4),%ymm6,%ymm7
  40a0c6:       57 00 
  40a0c8:       c5 fc 11 0c 95 00 9d    vmovups %ymm1,0x579d00(,%rdx,4)
  40a0cf:       57 00 
  40a0d1:       c5 fc 11 1c 95 20 9d    vmovups %ymm3,0x579d20(,%rdx,4)
  40a0d8:       57 00 
  40a0da:       c5 fc 11 2c 95 40 9d    vmovups %ymm5,0x579d40(,%rdx,4)
  40a0e1:       57 00 
  40a0e3:       c5 fc 11 3c 95 60 9d    vmovups %ymm7,0x579d60(,%rdx,4)
  40a0ea:       57 00 
  40a0ec:       48 83 c2 20             add    $0x20,%rdx
  40a0f0:       48 81 fa e0 7c 00 00    cmp    $0x7ce0,%rdx
  40a0f7:       72 87                   jb     40a080 <s116+0x30>
  40a0f9:       33 c9                   xor    %ecx,%ecx
  40a0fb:       ba e1 7c 00 00          mov    $0x7ce1,%edx
  40a100:       c5 fc 10 04 95 00 9d    vmovups 0x579d00(,%rdx,4),%ymm0
  40a107:       57 00 
  40a109:       48 83 c2 08             add    $0x8,%rdx
  40a10d:       c5 fc 59 0c 8d 80 90    vmulps 0x599080(,%rcx,4),%ymm0,%ymm1
  40a114:       59 00 
  40a116:       c5 fc 11 0c 8d 80 90    vmovups %ymm1,0x599080(,%rcx,4)
  40a11d:       59 00 
  40a11f:       48 83 c1 08             add    $0x8,%rcx
  40a123:       48 83 f9 18             cmp    $0x18,%rcx
  40a127:       72 d7                   jb     40a100 <s116+0xb0>
  40a129:       c5 fa 10 0d b3 ef 18    vmovss 0x18efb3(%rip),%xmm1        #
5990e4 <a+0x1f3e4>
  40a130:       00 
  40a131:       bf 00 9d 57 00          mov    $0x579d00,%edi
  40a136:       c5 fa 10 1d aa ef 18    vmovss 0x18efaa(%rip),%xmm3        #
5990e8 <a+0x1f3e8>
  40a13d:       00 
  40a13e:       be 80 d8 45 00          mov    $0x45d880,%esi
  40a143:       c5 f2 59 05 95 ef 18    vmulss 0x18ef95(%rip),%xmm1,%xmm0      
 # 5990e0 <a+0x1f3e0>
  40a14a:       00 
  40a14b:       ba 00 a9 55 00          mov    $0x55a900,%edx
  40a150:       c5 e2 59 25 94 ef 18    vmulss 0x18ef94(%rip),%xmm3,%xmm4      
 # 5990ec <a+0x1f3ec>
  40a157:       00 
  40a158:       c5 f2 59 d3             vmulss %xmm3,%xmm1,%xmm2
  40a15c:       c5 fa 11 05 7c ef 18    vmovss %xmm0,0x18ef7c(%rip)        #
5990e0 <a+0x1f3e0>
  40a163:       00 
  40a164:       b9 80 e4 43 00          mov    $0x43e480,%ecx
  40a169:       c5 fa 11 15 73 ef 18    vmovss %xmm2,0x18ef73(%rip)        #
5990e4 <a+0x1f3e4>
  40a170:       00 
  40a171:       41 b8 00 b5 53 00       mov    $0x53b500,%r8d
  40a177:       c5 fa 11 25 69 ef 18    vmovss %xmm4,0x18ef69(%rip)        #
5990e8 <a+0x1f3e8>
  40a17e:       00 
  40a17f:       41 b9 c0 b4 4b 00       mov    $0x4bb4c0,%r9d
  40a185:       68 00 91 59 00          push   $0x599100
  40a18a:       68 00 b5 4f 00          push   $0x4fb500
  40a18f:       c5 f8 77                vzeroupper 
  40a192:       c5 f8 57 c0             vxorps %xmm0,%xmm0,%xmm0
  40a196:       e8 d5 92 00 00          call   413470 <dummy>
  40a19b:       48 83 c4 10             add    $0x10,%rsp
  40a19f:       41 ff c7                inc    %r15d
  40a1a2:       41 81 ff 40 42 0f 00    cmp    $0xf4240,%r15d
  40a1a9:       0f 82 cc fe ff ff       jb     40a07b <s116+0x2b>
  40a1af:       48 83 c3 10             add    $0x10,%rbx
  40a1b3:       33 f6                   xor    %esi,%esi
  40a1b5:       48 89 df                mov    %rbx,%rdi
  40a1b8:       e8 03 6f ff ff          call   4010c0 <gettimeofday@plt>
  40a1bd:       bf 74 f5 42 00          mov    $0x42f574,%edi
  40a1c2:       48 83 c4 10             add    $0x10,%rsp
  40a1c6:       5b                      pop    %rbx
  40a1c7:       41 5f                   pop    %r15
  40a1c9:       48 89 ec                mov    %rbp,%rsp
  40a1cc:       5d                      pop    %rbp
  40a1cd:       e9 de 97 00 00          jmp    4139b0 <calc_checksum>
  40a1d2:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
  40a1d9:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)

Reply via email to