https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99395
--- Comment #3 from Jan Hubicka <hubicka at gcc dot gnu.org> --- ICC version seems to run faster 000000000040a050 <s116>: 40a050: 55 push %rbp 40a051: 48 89 e5 mov %rsp,%rbp 40a054: 48 83 e4 e0 and $0xffffffffffffffe0,%rsp 40a058: 41 57 push %r15 40a05a: 53 push %rbx 40a05b: 48 83 ec 10 sub $0x10,%rsp 40a05f: 48 89 fb mov %rdi,%rbx 40a062: bf 74 f5 42 00 mov $0x42f574,%edi 40a067: e8 14 cc 00 00 call 416c80 <initialise_arrays> 40a06c: 48 89 df mov %rbx,%rdi 40a06f: 33 f6 xor %esi,%esi 40a071: e8 4a 70 ff ff call 4010c0 <gettimeofday@plt> 40a076: 33 c0 xor %eax,%eax 40a078: 41 89 c7 mov %eax,%r15d 40a07b: 33 d2 xor %edx,%edx 40a07d: 0f 1f 00 nopl (%rax) 40a080: c5 fc 10 04 95 04 9d vmovups 0x579d04(,%rdx,4),%ymm0 40a087: 57 00 40a089: c5 fc 10 14 95 24 9d vmovups 0x579d24(,%rdx,4),%ymm2 40a090: 57 00 40a092: c5 fc 10 24 95 44 9d vmovups 0x579d44(,%rdx,4),%ymm4 40a099: 57 00 40a09b: c5 fc 10 34 95 64 9d vmovups 0x579d64(,%rdx,4),%ymm6 40a0a2: 57 00 40a0a4: c5 fc 59 0c 95 00 9d vmulps 0x579d00(,%rdx,4),%ymm0,%ymm1 40a0ab: 57 00 40a0ad: c5 ec 59 1c 95 20 9d vmulps 0x579d20(,%rdx,4),%ymm2,%ymm3 40a0b4: 57 00 40a0b6: c5 dc 59 2c 95 40 9d vmulps 0x579d40(,%rdx,4),%ymm4,%ymm5 40a0bd: 57 00 40a0bf: c5 cc 59 3c 95 60 9d vmulps 0x579d60(,%rdx,4),%ymm6,%ymm7 40a0c6: 57 00 40a0c8: c5 fc 11 0c 95 00 9d vmovups %ymm1,0x579d00(,%rdx,4) 40a0cf: 57 00 40a0d1: c5 fc 11 1c 95 20 9d vmovups %ymm3,0x579d20(,%rdx,4) 40a0d8: 57 00 40a0da: c5 fc 11 2c 95 40 9d vmovups %ymm5,0x579d40(,%rdx,4) 40a0e1: 57 00 40a0e3: c5 fc 11 3c 95 60 9d vmovups %ymm7,0x579d60(,%rdx,4) 40a0ea: 57 00 40a0ec: 48 83 c2 20 add $0x20,%rdx 40a0f0: 48 81 fa e0 7c 00 00 cmp $0x7ce0,%rdx 40a0f7: 72 87 jb 40a080 <s116+0x30> 40a0f9: 33 c9 xor %ecx,%ecx 40a0fb: ba e1 7c 00 00 mov $0x7ce1,%edx 40a100: c5 fc 10 04 95 00 9d vmovups 0x579d00(,%rdx,4),%ymm0 40a107: 57 00 40a109: 48 83 c2 08 add $0x8,%rdx 40a10d: c5 fc 59 0c 8d 80 90 vmulps 0x599080(,%rcx,4),%ymm0,%ymm1 40a114: 59 00 40a116: c5 fc 11 0c 8d 80 90 vmovups %ymm1,0x599080(,%rcx,4) 40a11d: 59 00 40a11f: 48 83 c1 08 add $0x8,%rcx 40a123: 48 83 f9 18 cmp $0x18,%rcx 40a127: 72 d7 jb 40a100 <s116+0xb0> 40a129: c5 fa 10 0d b3 ef 18 vmovss 0x18efb3(%rip),%xmm1 # 5990e4 <a+0x1f3e4> 40a130: 00 40a131: bf 00 9d 57 00 mov $0x579d00,%edi 40a136: c5 fa 10 1d aa ef 18 vmovss 0x18efaa(%rip),%xmm3 # 5990e8 <a+0x1f3e8> 40a13d: 00 40a13e: be 80 d8 45 00 mov $0x45d880,%esi 40a143: c5 f2 59 05 95 ef 18 vmulss 0x18ef95(%rip),%xmm1,%xmm0 # 5990e0 <a+0x1f3e0> 40a14a: 00 40a14b: ba 00 a9 55 00 mov $0x55a900,%edx 40a150: c5 e2 59 25 94 ef 18 vmulss 0x18ef94(%rip),%xmm3,%xmm4 # 5990ec <a+0x1f3ec> 40a157: 00 40a158: c5 f2 59 d3 vmulss %xmm3,%xmm1,%xmm2 40a15c: c5 fa 11 05 7c ef 18 vmovss %xmm0,0x18ef7c(%rip) # 5990e0 <a+0x1f3e0> 40a163: 00 40a164: b9 80 e4 43 00 mov $0x43e480,%ecx 40a169: c5 fa 11 15 73 ef 18 vmovss %xmm2,0x18ef73(%rip) # 5990e4 <a+0x1f3e4> 40a170: 00 40a171: 41 b8 00 b5 53 00 mov $0x53b500,%r8d 40a177: c5 fa 11 25 69 ef 18 vmovss %xmm4,0x18ef69(%rip) # 5990e8 <a+0x1f3e8> 40a17e: 00 40a17f: 41 b9 c0 b4 4b 00 mov $0x4bb4c0,%r9d 40a185: 68 00 91 59 00 push $0x599100 40a18a: 68 00 b5 4f 00 push $0x4fb500 40a18f: c5 f8 77 vzeroupper 40a192: c5 f8 57 c0 vxorps %xmm0,%xmm0,%xmm0 40a196: e8 d5 92 00 00 call 413470 <dummy> 40a19b: 48 83 c4 10 add $0x10,%rsp 40a19f: 41 ff c7 inc %r15d 40a1a2: 41 81 ff 40 42 0f 00 cmp $0xf4240,%r15d 40a1a9: 0f 82 cc fe ff ff jb 40a07b <s116+0x2b> 40a1af: 48 83 c3 10 add $0x10,%rbx 40a1b3: 33 f6 xor %esi,%esi 40a1b5: 48 89 df mov %rbx,%rdi 40a1b8: e8 03 6f ff ff call 4010c0 <gettimeofday@plt> 40a1bd: bf 74 f5 42 00 mov $0x42f574,%edi 40a1c2: 48 83 c4 10 add $0x10,%rsp 40a1c6: 5b pop %rbx 40a1c7: 41 5f pop %r15 40a1c9: 48 89 ec mov %rbp,%rsp 40a1cc: 5d pop %rbp 40a1cd: e9 de 97 00 00 jmp 4139b0 <calc_checksum> 40a1d2: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 40a1d9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)