https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95488
--- Comment #9 from Hongtao.liu <crazylht at gmail dot com> --- (In reply to H.J. Lu from comment #8) > -march=skylake-avx512 gave: > > [hjl@gnu-cfl-2 gcc]$ > /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc > -B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ > /export/gnu/import/git/sources/gcc/gcc/testsuite/gcc.target/i386/avx512bw- > pr95488-1.c -march=skylake-avx512 -fno-diagnostics-show-caret > -fno-diagnostics-show-line-numbers -fdiagnostics-color=never > -fdiagnostics-urls=never -O2 -ffat-lto-objects -fno-ident -S -o > avx512bw-pr95488-1.s > [hjl@gnu-cfl-2 gcc]$ cat avx512bw-pr95488-1.s > .file "avx512bw-pr95488-1.c" > .text > .p2align 4 > .globl mul_512 > .type mul_512, @function > mul_512: > .LFB0: > .cfi_startproc > vpunpcklbw %ymm0, %ymm0, %ymm3 > vpunpcklbw %ymm1, %ymm1, %ymm2 > vpunpckhbw %ymm0, %ymm0, %ymm0 > vpunpckhbw %ymm1, %ymm1, %ymm1 > vpmullw %ymm3, %ymm2, %ymm2 > vpmullw %ymm0, %ymm1, %ymm1 > vpshufb .LC0(%rip), %ymm2, %ymm0 > vpshufb .LC1(%rip), %ymm1, %ymm1 > vpor %ymm1, %ymm0, %ymm0 > ret > .cfi_endproc > .LFE0: > .size mul_512, .-mul_512 > .p2align 4 > .globl umul_512 > .type umul_512, @function > umul_512: > .LFB1: > .cfi_startproc > vpunpcklbw %ymm0, %ymm0, %ymm3 > vpunpcklbw %ymm1, %ymm1, %ymm2 > vpunpckhbw %ymm0, %ymm0, %ymm0 > vpunpckhbw %ymm1, %ymm1, %ymm1 > vpmullw %ymm3, %ymm2, %ymm2 > vpmullw %ymm0, %ymm1, %ymm1 > vpshufb .LC0(%rip), %ymm2, %ymm0 > vpshufb .LC1(%rip), %ymm1, %ymm1 > vpor %ymm1, %ymm0, %ymm0 > ret > .cfi_endproc > .LFE1: > .size umul_512, .-umul_512 It's on purpose, maybe I'll add -mprefer-vector-with=512 to testcase. ---- 19498 /* Not generate zmm instruction when prefer 128/256 bit vector width. */ 19499 if (qimode == V32QImode 19500 && (TARGET_PREFER_AVX128 || TARGET_PREFER_AVX256)) 19501 return false; ----