https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86752
Bug ID: 86752 Summary: FAIL: gcc.target/i386/avx2-cvt-2.c Product: gcc Version: 9.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: hjl.tools at gmail dot com CC: ubizjak at gmail dot com, wei3.xiao at intel dot com Target Milestone: --- Target: i386,x86-64 For --- #define N 16 float f[N]; double d[N]; int n[N]; __attribute__((noinline)) void f3 (void) { int i; for (i = 0; i < N; i++) d[i] = f[i]; } --- r263067 improved -O3 -mavx2 -mtune=generic -m64 from .cfi_startproc vmovaps f(%rip), %xmm2 vmovaps f+32(%rip), %xmm3 vinsertf128 $0x1, f+16(%rip), %ymm2, %ymm0 vcvtps2pd %xmm0, %ymm1 vextractf128 $0x1, %ymm0, %xmm0 vmovaps %xmm1, d(%rip) vextractf128 $0x1, %ymm1, d+16(%rip) vcvtps2pd %xmm0, %ymm0 vmovaps %xmm0, d+32(%rip) vextractf128 $0x1, %ymm0, d+48(%rip) vinsertf128 $0x1, f+48(%rip), %ymm3, %ymm0 vcvtps2pd %xmm0, %ymm1 vextractf128 $0x1, %ymm0, %xmm0 vmovaps %xmm1, d+64(%rip) vextractf128 $0x1, %ymm1, d+80(%rip) vcvtps2pd %xmm0, %ymm0 vmovaps %xmm0, d+96(%rip) vextractf128 $0x1, %ymm0, d+112(%rip) vzeroupper ret .cfi_endproc to .cfi_startproc vcvtps2pd f(%rip), %ymm0 vmovaps %xmm0, d(%rip) vextractf128 $0x1, %ymm0, d+16(%rip) vcvtps2pd f+16(%rip), %ymm0 vmovaps %xmm0, d+32(%rip) vextractf128 $0x1, %ymm0, d+48(%rip) vcvtps2pd f+32(%rip), %ymm0 vextractf128 $0x1, %ymm0, d+80(%rip) vmovaps %xmm0, d+64(%rip) vcvtps2pd f+48(%rip), %ymm0 vextractf128 $0x1, %ymm0, d+112(%rip) vmovaps %xmm0, d+96(%rip) vzeroupper ret .cfi_endproc This lead to FAIL: gcc.target/i386/avx2-cvt-2.c scan-assembler vcvtps2pd[^\n\r]*(%xmm[^\n\r]*%ymm|ymm[^\n\r]*xmm) FAIL: gcc.target/i386/avx-cvt-2.c scan-assembler vcvtps2pd[^\n\r]*(%xmm[^\n\r]*%ymm|ymm[^\n\r]*xmm) For -m32, we generate: .cfi_startproc vmovups f, %xmm2 vinsertf128 $0x1, f+16, %ymm2, %ymm0 vmovups f+32, %xmm3 vextractf128 $0x1, %ymm0, %xmm0 vcvtps2pd %xmm2, %ymm1 vmovups %xmm1, d vextractf128 $0x1, %ymm1, d+16 vcvtps2pd %xmm0, %ymm0 vcvtps2pd %xmm3, %ymm1 vmovups %xmm0, d+32 vextractf128 $0x1, %ymm0, d+48 vinsertf128 $0x1, f+48, %ymm3, %ymm0 vmovups %xmm1, d+64 vextractf128 $0x1, %ymm1, d+80 vextractf128 $0x1, %ymm0, %xmm0 vcvtps2pd %xmm0, %ymm0 vmovups %xmm0, d+96 vextractf128 $0x1, %ymm0, d+112 vzeroupper ret .cfi_endproc which is caused by different tuning options for -mtune=generic -m32.