https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63351
H.J. Lu <hjl.tools at gmail dot com> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |INVALID --- Comment #6 from H.J. Lu <hjl.tools at gmail dot com> --- AVX512 scalar broadcast only works on memory operand. GCC 9 now generates: [hjl@gnu-efi-2 gcc]$ cat x.c #include <immintrin.h> void dummyx(__m128 a, __m128 b); void broadcastx(__m128 a, float *b) { __m128 bb = _mm_set1_ps(*b); __m128 ab = _mm_add_ps(a,bb); __m128 cc = _mm_set1_ps(5.0); __m128 ac = _mm_add_ps(a,cc); dummyx(ab, ac); } [hjl@gnu-efi-2 gcc]$ cat y.c #include <immintrin.h> void dummyz(__m512i a, __m512i b); void broadcastz(__m512i a, int *b) { __m512i bb = _mm512_set1_epi32(*b); __m512i ab = _mm512_add_epi32(a,bb); __m512i cc = _mm512_set1_epi32(5); __m512i ac = _mm512_add_epi32(a,cc); dummyz(ab, ac); } [hjl@gnu-efi-2 gcc]$ ./xgcc -B./ -S -Ofast -mavx512vl x.c [hjl@gnu-efi-2 gcc]$ ./xgcc -B./ -S -Ofast -mavx512f y.c [hjl@gnu-efi-2 gcc]$ cat x.s .file "x.c" .text .p2align 4 .globl broadcastx .type broadcastx, @function broadcastx: .LFB5186: .cfi_startproc vmovaps %xmm0, %xmm1 vaddps (%rdi){1to4}, %xmm0, %xmm0 vaddps .LC0(%rip), %xmm1, %xmm1 jmp dummyx .cfi_endproc .LFE5186: .size broadcastx, .-broadcastx .section .rodata.cst16,"aM",@progbits,16 .align 16 .LC0: .long 1084227584 .long 1084227584 .long 1084227584 .long 1084227584 .ident "GCC: (GNU) 9.0.0 20181022 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-efi-2 gcc]$ cat y.s .file "y.c" .text .p2align 4 .globl broadcastz .type broadcastz, @function broadcastz: .LFB5186: .cfi_startproc movl $5, %eax vmovdqa64 %zmm0, %zmm2 vpaddd (%rdi){1to16}, %zmm0, %zmm0 vpbroadcastd %eax, %zmm1 vpaddd %zmm2, %zmm1, %zmm1 jmp dummyz .cfi_endproc .LFE5186: .size broadcastz, .-broadcastz .ident "GCC: (GNU) 9.0.0 20181022 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-efi-2 gcc]$ With PR 87537 fix, I got [hjl@gnu-efi-2 gcc]$ cat x.s .file "x.c" .text .p2align 4 .globl broadcastx .type broadcastx, @function broadcastx: .LFB5186: .cfi_startproc vmovaps %xmm0, %xmm1 vaddps (%rdi){1to4}, %xmm0, %xmm0 vaddps .LC1(%rip){1to4}, %xmm1, %xmm1 jmp dummyx .cfi_endproc .LFE5186: .size broadcastx, .-broadcastx .section .rodata.cst4,"aM",@progbits,4 .align 4 .LC1: .long 1084227584 .ident "GCC: (GNU) 9.0.0 20181022 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-efi-2 gcc]$