https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68923
H.J. Lu <hjl.tools at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |RESOLVED
Resolution|--- |FIXED
Target Milestone|--- |9.0
--- Comment #3 from H.J. Lu <hjl.tools at gmail dot com> ---
Fixed for GCC 9:
[hjl@gnu-cfl-1 gcc]cat x.c
#include <immintrin.h>
#include <stdint.h>
#define USE_MOVQ
__m256 load_bytes_to_m256(uint8_t *p)
{
#ifdef USE_MOVQ // compiles to an actual movq then pmovzx xmm,xmm with gcc
-O3
__m128i small_load = _mm_cvtsi64_si128( *(uint64_t*)p );
#else // loadu compiles to a 128b load with gcc -O0, potentially segfaulting
__m128i small_load = _mm_loadu_si128( (__m128i*)p );
#endif
__m256i intvec = _mm256_cvtepu8_epi32( small_load );
return _mm256_cvtepi32_ps(intvec);
}
[hjl@gnu-cfl-1 gcc]$ ./xgcc -B./ -S -O3 x.c -march=haswell
[hjl@gnu-cfl-1 gcc]$ cat x.s
.file "x.c"
.text
.p2align 4
.globl load_bytes_to_m256
.type load_bytes_to_m256, @function
load_bytes_to_m256:
.LFB5186:
.cfi_startproc
vpmovzxbd (%rdi), %ymm0
vcvtdq2ps %ymm0, %ymm0
ret
.cfi_endproc
.LFE5186:
.size load_bytes_to_m256, .-load_bytes_to_m256
.ident "GCC: (GNU) 9.0.0 20190118 (experimental)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 gcc]$