https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63351

H.J. Lu <hjl.tools at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|---                         |INVALID

--- Comment #6 from H.J. Lu <hjl.tools at gmail dot com> ---
AVX512 scalar broadcast only works on memory operand.  GCC 9 now generates:

[hjl@gnu-efi-2 gcc]$ cat x.c
#include <immintrin.h>

void dummyx(__m128 a, __m128 b);

void broadcastx(__m128 a, float *b) {
    __m128 bb = _mm_set1_ps(*b);
    __m128 ab = _mm_add_ps(a,bb);
    __m128 cc = _mm_set1_ps(5.0);
    __m128 ac = _mm_add_ps(a,cc);
    dummyx(ab, ac);
}
[hjl@gnu-efi-2 gcc]$ cat y.c
#include <immintrin.h>

void dummyz(__m512i a, __m512i b);

void broadcastz(__m512i a, int *b) {
    __m512i bb = _mm512_set1_epi32(*b);
    __m512i ab = _mm512_add_epi32(a,bb);
    __m512i cc = _mm512_set1_epi32(5);
    __m512i ac = _mm512_add_epi32(a,cc);
    dummyz(ab, ac);
}
[hjl@gnu-efi-2 gcc]$ ./xgcc -B./ -S -Ofast -mavx512vl x.c
[hjl@gnu-efi-2 gcc]$ ./xgcc -B./ -S -Ofast -mavx512f y.c
[hjl@gnu-efi-2 gcc]$ cat x.s 
        .file   "x.c"
        .text
        .p2align 4
        .globl  broadcastx
        .type   broadcastx, @function
broadcastx:
.LFB5186:
        .cfi_startproc
        vmovaps %xmm0, %xmm1
        vaddps  (%rdi){1to4}, %xmm0, %xmm0
        vaddps  .LC0(%rip), %xmm1, %xmm1
        jmp     dummyx
        .cfi_endproc
.LFE5186:
        .size   broadcastx, .-broadcastx
        .section        .rodata.cst16,"aM",@progbits,16
        .align 16
.LC0:
        .long   1084227584
        .long   1084227584
        .long   1084227584
        .long   1084227584
        .ident  "GCC: (GNU) 9.0.0 20181022 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-efi-2 gcc]$ cat y.s 
        .file   "y.c"
        .text
        .p2align 4
        .globl  broadcastz
        .type   broadcastz, @function
broadcastz:
.LFB5186:
        .cfi_startproc
        movl    $5, %eax
        vmovdqa64       %zmm0, %zmm2
        vpaddd  (%rdi){1to16}, %zmm0, %zmm0
        vpbroadcastd    %eax, %zmm1
        vpaddd  %zmm2, %zmm1, %zmm1
        jmp     dummyz
        .cfi_endproc
.LFE5186:
        .size   broadcastz, .-broadcastz
        .ident  "GCC: (GNU) 9.0.0 20181022 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-efi-2 gcc]$ 

With PR 87537 fix, I got

[hjl@gnu-efi-2 gcc]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4
        .globl  broadcastx
        .type   broadcastx, @function
broadcastx:
.LFB5186:
        .cfi_startproc
        vmovaps %xmm0, %xmm1
        vaddps  (%rdi){1to4}, %xmm0, %xmm0
        vaddps  .LC1(%rip){1to4}, %xmm1, %xmm1
        jmp     dummyx
        .cfi_endproc
.LFE5186:
        .size   broadcastx, .-broadcastx
        .section        .rodata.cst4,"aM",@progbits,4
        .align 4
.LC1:
        .long   1084227584
        .ident  "GCC: (GNU) 9.0.0 20181022 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-efi-2 gcc]$

Reply via email to