https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57952
mmokrejs at gmail dot com changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |mmokrejs at gmail dot com --- Comment #2 from mmokrejs at gmail dot com --- I see a similar problem with gcc-4.9.6 unable to use 256-bit YMM registers. It only uses 128-bit XMM registers from SSE instructions. Try https://people.eecs.berkeley.edu/~samw/research/code/stream/stream.c $ gcc -march=native -o stream stream.c $ objdump -d stream | grep ymm $ icc does the job properly: $ icc -qopenmp -O3 -xhost stream.c $ objdump -d a.out | grep ymm 401323: c5 fd 10 00 vmovupd (%rax),%ymm0 40132d: c5 fd 10 48 20 vmovupd 0x20(%rax),%ymm1 401332: c5 7d 10 40 40 vmovupd 0x40(%rax),%ymm8 401337: c5 7d 10 48 60 vmovupd 0x60(%rax),%ymm9 40133c: c5 7d 10 1d 3c 65 00 vmovupd 0x653c(%rip),%ymm11 # 407880 <_IO_stdin_used+0x340> 401344: c5 fd 5c 50 f8 vsubpd -0x8(%rax),%ymm0,%ymm2 401349: c5 f5 5c 58 18 vsubpd 0x18(%rax),%ymm1,%ymm3 40134e: c5 3d 5c 50 38 vsubpd 0x38(%rax),%ymm8,%ymm10 401353: c5 35 5c 60 58 vsubpd 0x58(%rax),%ymm9,%ymm12 401358: c5 a5 59 e2 vmulpd %ymm2,%ymm11,%ymm4 401361: c5 a5 59 f3 vmulpd %ymm3,%ymm11,%ymm6 401365: c4 41 25 59 ea vmulpd %ymm10,%ymm11,%ymm13 40136a: c4 41 25 59 fc vmulpd %ymm12,%ymm11,%ymm15 40136f: c5 fd e6 ec vcvttpd2dq %ymm4,%xmm5 401373: c5 fd e6 fe vcvttpd2dq %ymm6,%xmm7 401377: c4 41 7d e6 f5 vcvttpd2dq %ymm13,%xmm14 40137c: c5 f5 ef c9 vpxor %ymm1,%ymm1,%ymm1 401380: c4 41 7d e6 ef vcvttpd2dq %ymm15,%xmm13 401385: c4 e2 7d 58 e2 vpbroadcastd %xmm2,%ymm4 40139c: c4 e3 55 38 c7 01 vinserti128 $0x1,%xmm7,%ymm5,%ymm0 4013a2: c4 e2 7d 3d d9 vpmaxsd %ymm1,%ymm0,%ymm3 4013a7: c4 e2 65 39 f4 vpminsd %ymm4,%ymm3,%ymm6 4013ac: c4 43 0d 38 fd 01 vinserti128 $0x1,%xmm13,%ymm14,%ymm15 4013b2: c4 e2 05 3d e9 vpmaxsd %ymm1,%ymm15,%ymm5 4013b7: c4 e2 55 39 fe vpminsd %ymm6,%ymm5,%ymm7 4013bc: c4 c3 7d 39 f8 01 vextracti128 $0x1,%ymm7,%xmm8 401caa: c5 fd 57 c0 vxorpd %ymm0,%ymm0,%ymm0 401cc8: c5 fd 10 16 vmovupd (%rsi),%ymm2 401cd0: c5 fd 10 5e 20 vmovupd 0x20(%rsi),%ymm3 401cd5: c5 fd 10 66 40 vmovupd 0x40(%rsi),%ymm4 401cda: c5 fd 10 76 60 vmovupd 0x60(%rsi),%ymm6 401ce7: c4 e2 ed b8 0f vfmadd231pd (%rdi),%ymm2,%ymm1 401cec: c4 e2 e5 b8 47 20 vfmadd231pd 0x20(%rdi),%ymm3,%ymm0 401cf2: c5 dd 59 6f 40 vmulpd 0x40(%rdi),%ymm4,%ymm5 401cf7: c5 cd 59 7f 60 vmulpd 0x60(%rdi),%ymm6,%ymm7 401cfc: c5 d5 58 c9 vaddpd %ymm1,%ymm5,%ymm1 401d00: c5 c5 58 c0 vaddpd %ymm0,%ymm7,%ymm0 401d0d: c5 f5 58 c0 vaddpd %ymm0,%ymm1,%ymm0 401d11: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1 401d6a: c5 fd 10 8c c3 c0 be vmovupd 0x60bec0(%rbx,%rax,8),%ymm1 401d73: c5 f5 59 94 c3 c0 60 vmulpd 0x268660c0(%rbx,%rax,8),%ymm1,%ymm2 401d7c: c5 ed 58 c0 vaddpd %ymm0,%ymm2,%ymm0 401d85: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1 401f9b: c4 e2 7d 19 44 24 10 vbroadcastsd 0x10(%rsp),%ymm0 401fa2: c5 fd 10 8c d1 c0 be vmovupd 0x60bec0(%rcx,%rdx,8),%ymm1 401fab: c4 e2 fd a8 8c d1 c0 vfmadd213pd 0x268660c0(%rcx,%rdx,8),%ymm0,%ymm1 401fb5: c5 fd 2b 8c d1 c0 02 vmovntpd %ymm1,0x4cac02c0(%rcx,%rdx,8) 40213b: c5 fd 10 84 d1 c0 60 vmovupd 0x268660c0(%rcx,%rdx,8),%ymm0 402144: c5 fd 58 8c d1 c0 be vaddpd 0x60bec0(%rcx,%rdx,8),%ymm0,%ymm1 40214d: c5 fd 2b 8c d1 c0 02 vmovntpd %ymm1,0x4cac02c0(%rcx,%rdx,8) 4022dd: c4 e2 7d 19 44 24 10 vbroadcastsd 0x10(%rsp),%ymm0 4022e4: c5 fd 2b 84 d1 c0 02 vmovntpd %ymm0,0x4cac02c0(%rcx,%rdx,8) 4025c2: c5 fd 10 05 76 52 00 vmovupd 0x5276(%rip),%ymm0 # 407840 <_IO_stdin_used+0x300> 4025de: c5 fd 59 0e vmulpd (%rsi),%ymm0,%ymm1 4025e2: c5 fd 11 0e vmovupd %ymm1,(%rsi) 402759: c5 fd 10 15 ff 50 00 vmovupd 0x50ff(%rip),%ymm2 # 407860 <_IO_stdin_used+0x320> 402761: c5 fd 10 0d d7 50 00 vmovupd 0x50d7(%rip),%ymm1 # 407840 <_IO_stdin_used+0x300> 402769: c5 fd 57 c0 vxorpd %ymm0,%ymm0,%ymm0 40276d: c5 fd 2b 94 f8 c0 60 vmovntpd %ymm2,0x268660c0(%rax,%rdi,8) 402776: c5 fd 2b 8c f8 c0 be vmovntpd %ymm1,0x60bec0(%rax,%rdi,8) 40277f: c5 fd 2b 84 f8 c0 02 vmovntpd %ymm0,0x4cac02c0(%rax,%rdi,8) 4030b0: c5 fd 10 84 c8 c0 60 vmovupd 0x268660c0(%rax,%rcx,8),%ymm0 4030b9: c5 fd 2b 84 c8 c0 02 vmovntpd %ymm0,0x4cac02c0(%rax,%rcx,8) 4032f0: c5 fd 10 44 24 08 vmovupd 0x8(%rsp),%ymm0 4032fb: c5 fd 10 4c 24 28 vmovupd 0x28(%rsp),%ymm1 403301: c5 7d 10 44 24 48 vmovupd 0x48(%rsp),%ymm8 403307: c5 7d 10 4c 24 68 vmovupd 0x68(%rsp),%ymm9 40330d: c5 7d 10 1d 6b 45 00 vmovupd 0x456b(%rip),%ymm11 # 407880 <_IO_stdin_used+0x340> 403315: c5 fd 5c 14 24 vsubpd (%rsp),%ymm0,%ymm2 40331a: c5 f5 5c 5c 24 20 vsubpd 0x20(%rsp),%ymm1,%ymm3 403320: c5 3d 5c 54 24 40 vsubpd 0x40(%rsp),%ymm8,%ymm10 403326: c5 35 5c 64 24 60 vsubpd 0x60(%rsp),%ymm9,%ymm12 40332c: c5 a5 59 e2 vmulpd %ymm2,%ymm11,%ymm4 403334: c5 a5 59 f3 vmulpd %ymm3,%ymm11,%ymm6 403338: c4 41 25 59 ea vmulpd %ymm10,%ymm11,%ymm13 40333d: c4 41 25 59 fc vmulpd %ymm12,%ymm11,%ymm15 403342: c5 fd e6 ec vcvttpd2dq %ymm4,%xmm5 403346: c5 fd e6 fe vcvttpd2dq %ymm6,%xmm7 40334a: c4 41 7d e6 f5 vcvttpd2dq %ymm13,%xmm14 40334f: c5 f5 ef c9 vpxor %ymm1,%ymm1,%ymm1 403353: c4 41 7d e6 ef vcvttpd2dq %ymm15,%xmm13 403358: c4 e2 7d 58 e2 vpbroadcastd %xmm2,%ymm4 403368: c4 e3 55 38 c7 01 vinserti128 $0x1,%xmm7,%ymm5,%ymm0 40336e: c4 e2 7d 3d d9 vpmaxsd %ymm1,%ymm0,%ymm3 403373: c4 e2 65 39 f4 vpminsd %ymm4,%ymm3,%ymm6 403378: c4 43 0d 38 fd 01 vinserti128 $0x1,%xmm13,%ymm14,%ymm15 40337e: c4 e2 05 3d e9 vpmaxsd %ymm1,%ymm15,%ymm5 403383: c4 e2 55 39 fe vpminsd %ymm6,%ymm5,%ymm7 403388: c4 c3 7d 39 f8 01 vextracti128 $0x1,%ymm7,%xmm8 403694: c4 c1 7d 10 84 c0 c0 vmovupd 0x268660c0(%r8,%rax,8),%ymm0 40369e: c4 c1 7d 2b 84 c0 c0 vmovntpd %ymm0,0x4cac02c0(%r8,%rax,8) 4038ba: c5 fd 10 84 d1 c0 60 vmovupd 0x268660c0(%rcx,%rdx,8),%ymm0 4038c3: c5 fd 58 8c d1 c0 be vaddpd 0x60bec0(%rcx,%rdx,8),%ymm0,%ymm1 4038cc: c5 fd 2b 8c d1 c0 02 vmovntpd %ymm1,0x4cac02c0(%rcx,%rdx,8) 403b4a: c4 e2 7d 19 c1 vbroadcastsd %xmm1,%ymm0 403b4f: c5 fd 10 94 d1 c0 be vmovupd 0x60bec0(%rcx,%rdx,8),%ymm2 403b58: c4 e2 fd a8 94 d1 c0 vfmadd213pd 0x268660c0(%rcx,%rdx,8),%ymm0,%ymm2 403b62: c5 fd 2b 94 d1 c0 02 vmovntpd %ymm2,0x4cac02c0(%rcx,%rdx,8) 403e2e: c5 fd 57 c0 vxorpd %ymm0,%ymm0,%ymm0 403e40: c5 fd 10 14 dd c0 be vmovupd 0x60bec0(,%rbx,8),%ymm2 403e4d: c5 fd 10 1c dd e0 be vmovupd 0x60bee0(,%rbx,8),%ymm3 403e56: c5 fd 10 24 dd 00 bf vmovupd 0x60bf00(,%rbx,8),%ymm4 403e5f: c5 fd 10 34 dd 20 bf vmovupd 0x60bf20(,%rbx,8),%ymm6 403e68: c4 e2 ed b8 0c dd c0 vfmadd231pd 0x268660c0(,%rbx,8),%ymm2,%ymm1 403e72: c4 e2 e5 b8 04 dd e0 vfmadd231pd 0x268660e0(,%rbx,8),%ymm3,%ymm0 403e7c: c5 dd 59 2c dd 00 61 vmulpd 0x26866100(,%rbx,8),%ymm4,%ymm5 403e85: c5 cd 59 3c dd 20 61 vmulpd 0x26866120(,%rbx,8),%ymm6,%ymm7 403e8e: c5 d5 58 c9 vaddpd %ymm1,%ymm5,%ymm1 403e92: c5 c5 58 c0 vaddpd %ymm0,%ymm7,%ymm0 403e9f: c5 f5 58 c0 vaddpd %ymm0,%ymm1,%ymm0 403ea3: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1 403efb: c4 c1 7d 10 8c c1 c0 vmovupd 0x60bec0(%r9,%rax,8),%ymm1 403f05: c4 c1 75 59 94 c1 c0 vmulpd 0x268660c0(%r9,%rax,8),%ymm1,%ymm2 403f0f: c5 ed 58 c0 vaddpd %ymm0,%ymm2,%ymm0 403f18: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1 404216: c4 e2 7d 19 c1 vbroadcastsd %xmm1,%ymm0 40421b: c5 fd 2b 84 d1 c0 02 vmovntpd %ymm0,0x4cac02c0(%rcx,%rdx,8) 404690: c5 fe 6f 0e vmovdqu (%rsi),%ymm1 404694: c5 fe 6f 56 20 vmovdqu 0x20(%rsi),%ymm2 404699: c5 fe 6f 5e 40 vmovdqu 0x40(%rsi),%ymm3 40469e: c5 fe 6f 66 60 vmovdqu 0x60(%rsi),%ymm4 4046a3: c5 fe 6f ae 80 00 00 vmovdqu 0x80(%rsi),%ymm5 4046ab: c5 fe 6f b6 a0 00 00 vmovdqu 0xa0(%rsi),%ymm6 4046b3: c5 fe 6f be c0 00 00 vmovdqu 0xc0(%rsi),%ymm7 4046bb: c5 7e 6f 86 e0 00 00 vmovdqu 0xe0(%rsi),%ymm8 4046c3: c5 fd 7f 0f vmovdqa %ymm1,(%rdi) 4046c7: c5 fd 7f 57 20 vmovdqa %ymm2,0x20(%rdi) 4046cc: c5 fd 7f 5f 40 vmovdqa %ymm3,0x40(%rdi) 4046d1: c5 fd 7f 67 60 vmovdqa %ymm4,0x60(%rdi) 4046d6: c5 fd 7f af 80 00 00 vmovdqa %ymm5,0x80(%rdi) 4046de: c5 fd 7f b7 a0 00 00 vmovdqa %ymm6,0xa0(%rdi) 4046e6: c5 fd 7f bf c0 00 00 vmovdqa %ymm7,0xc0(%rdi) 4046ee: c5 7d 7f 87 e0 00 00 vmovdqa %ymm8,0xe0(%rdi) 40475c: c5 fe 6f 0e vmovdqu (%rsi),%ymm1 404760: c5 fe 6f 56 20 vmovdqu 0x20(%rsi),%ymm2 404765: c5 fe 6f 5e 40 vmovdqu 0x40(%rsi),%ymm3 40476a: c5 fe 6f 66 60 vmovdqu 0x60(%rsi),%ymm4 40476f: c5 fe 6f ae 80 00 00 vmovdqu 0x80(%rsi),%ymm5 404777: c5 fe 6f b6 a0 00 00 vmovdqu 0xa0(%rsi),%ymm6 40477f: c5 fe 6f be c0 00 00 vmovdqu 0xc0(%rsi),%ymm7 404787: c5 7e 6f 86 e0 00 00 vmovdqu 0xe0(%rsi),%ymm8 404796: c5 fd e7 0f vmovntdq %ymm1,(%rdi) 40479a: c5 fd e7 57 20 vmovntdq %ymm2,0x20(%rdi) 40479f: c5 fd e7 5f 40 vmovntdq %ymm3,0x40(%rdi) 4047a4: c5 fd e7 67 60 vmovntdq %ymm4,0x60(%rdi) 4047a9: c5 fd e7 af 80 00 00 vmovntdq %ymm5,0x80(%rdi) 4047b1: c5 fd e7 b7 a0 00 00 vmovntdq %ymm6,0xa0(%rdi) 4047b9: c5 fd e7 bf c0 00 00 vmovntdq %ymm7,0xc0(%rdi) 4047c1: c5 7d e7 87 e0 00 00 vmovntdq %ymm8,0xe0(%rdi) 4048f0: c5 fc 10 86 20 ff ff vmovups -0xe0(%rsi),%ymm0 4048f8: c5 fc 29 87 20 ff ff vmovaps %ymm0,-0xe0(%rdi) 404900: c5 fc 10 86 40 ff ff vmovups -0xc0(%rsi),%ymm0 404908: c5 fc 29 87 40 ff ff vmovaps %ymm0,-0xc0(%rdi) 404910: c5 fc 10 86 60 ff ff vmovups -0xa0(%rsi),%ymm0 404918: c5 fc 29 87 60 ff ff vmovaps %ymm0,-0xa0(%rdi) 404920: c5 fc 10 46 80 vmovups -0x80(%rsi),%ymm0 404925: c5 fc 29 47 80 vmovaps %ymm0,-0x80(%rdi) 40492a: c5 fc 10 46 a0 vmovups -0x60(%rsi),%ymm0 40492f: c5 fc 29 47 a0 vmovaps %ymm0,-0x60(%rdi) 404934: c5 fc 10 46 c0 vmovups -0x40(%rsi),%ymm0 404939: c5 fc 29 47 c0 vmovaps %ymm0,-0x40(%rdi) 40493e: c5 fc 10 46 e0 vmovups -0x20(%rsi),%ymm0 404943: c5 fc 29 47 e0 vmovaps %ymm0,-0x20(%rdi) 404a40: c5 fc 10 06 vmovups (%rsi),%ymm0 404a44: c5 fc 11 07 vmovups %ymm0,(%rdi) 404a48: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404a4e: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404a60: c5 fc 10 06 vmovups (%rsi),%ymm0 404a64: c5 fc 11 07 vmovups %ymm0,(%rdi) 404a68: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0 404a6d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi) 404a72: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404a78: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404a90: c5 fc 10 06 vmovups (%rsi),%ymm0 404a94: c5 fc 11 07 vmovups %ymm0,(%rdi) 404a98: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0 404a9d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi) 404aa2: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0 404aa7: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi) 404aac: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404ab2: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404ac0: c5 fc 10 06 vmovups (%rsi),%ymm0 404ac4: c5 fc 11 07 vmovups %ymm0,(%rdi) 404ac8: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0 404acd: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi) 404ad2: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0 404ad7: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi) 404adc: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0 404ae1: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi) 404ae6: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404aec: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404b00: c5 fc 10 06 vmovups (%rsi),%ymm0 404b04: c5 fc 11 07 vmovups %ymm0,(%rdi) 404b08: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0 404b0d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi) 404b12: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0 404b17: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi) 404b1c: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0 404b21: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi) 404b26: c5 fc 10 86 80 00 00 vmovups 0x80(%rsi),%ymm0 404b2e: c5 fc 11 87 80 00 00 vmovups %ymm0,0x80(%rdi) 404b36: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404b3c: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404b50: c5 fc 10 06 vmovups (%rsi),%ymm0 404b54: c5 fc 11 07 vmovups %ymm0,(%rdi) 404b58: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0 404b5d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi) 404b62: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0 404b67: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi) 404b6c: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0 404b71: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi) 404b76: c5 fc 10 86 80 00 00 vmovups 0x80(%rsi),%ymm0 404b7e: c5 fc 11 87 80 00 00 vmovups %ymm0,0x80(%rdi) 404b86: c5 fc 10 86 a0 00 00 vmovups 0xa0(%rsi),%ymm0 404b8e: c5 fc 11 87 a0 00 00 vmovups %ymm0,0xa0(%rdi) 404b96: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404b9c: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404bb0: c5 fc 10 06 vmovups (%rsi),%ymm0 404bb4: c5 fc 11 07 vmovups %ymm0,(%rdi) 404bb8: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0 404bbd: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi) 404bc2: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0 404bc7: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi) 404bcc: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0 404bd1: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi) 404bd6: c5 fc 10 86 80 00 00 vmovups 0x80(%rsi),%ymm0 404bde: c5 fc 11 87 80 00 00 vmovups %ymm0,0x80(%rdi) 404be6: c5 fc 10 86 a0 00 00 vmovups 0xa0(%rsi),%ymm0 404bee: c5 fc 11 87 a0 00 00 vmovups %ymm0,0xa0(%rdi) 404bf6: c5 fc 10 86 c0 00 00 vmovups 0xc0(%rsi),%ymm0 404bfe: c5 fc 11 87 c0 00 00 vmovups %ymm0,0xc0(%rdi) 404c06: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404c0c: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) 404c20: c5 fc 10 84 0e 00 ff vmovups -0x100(%rsi,%rcx,1),%ymm0 404c29: c5 fc 11 84 0f 00 ff vmovups %ymm0,-0x100(%rdi,%rcx,1) 404c32: c5 fc 10 84 0e 20 ff vmovups -0xe0(%rsi,%rcx,1),%ymm0 404c3b: c5 fc 11 84 0f 20 ff vmovups %ymm0,-0xe0(%rdi,%rcx,1) 404c44: c5 fc 10 84 0e 40 ff vmovups -0xc0(%rsi,%rcx,1),%ymm0 404c4d: c5 fc 11 84 0f 40 ff vmovups %ymm0,-0xc0(%rdi,%rcx,1) 404c56: c5 fc 10 84 0e 60 ff vmovups -0xa0(%rsi,%rcx,1),%ymm0 404c5f: c5 fc 11 84 0f 60 ff vmovups %ymm0,-0xa0(%rdi,%rcx,1) 404c68: c5 fc 10 44 0e 80 vmovups -0x80(%rsi,%rcx,1),%ymm0 404c6e: c5 fc 11 44 0f 80 vmovups %ymm0,-0x80(%rdi,%rcx,1) 404c74: c5 fc 10 44 0e a0 vmovups -0x60(%rsi,%rcx,1),%ymm0 404c7a: c5 fc 11 44 0f a0 vmovups %ymm0,-0x60(%rdi,%rcx,1) 404c80: c5 fc 10 44 0e c0 vmovups -0x40(%rsi,%rcx,1),%ymm0 404c86: c5 fc 11 44 0f c0 vmovups %ymm0,-0x40(%rdi,%rcx,1) 404c8c: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0 404c92: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1) $