Hello, Patch in the bottom fixes spec2k6/410.bwaves compfail on gcc-5-branch. AVX-512F broadcasts to ymm are not allowed, we need to use zmm as destination instead.
I'll commit it to gcc-5 after bootstrap & regtest. I'll also check if it is needed for upcoming v6 gcc/ * config/i386/sse.md (define_insn "vec_dup<mode>"): Separate EVEX alternative. gcc/testsuite/ * gcc.target/i386/avx512f-vbroadcastsd-3.c: New test. -- Thanks, K commit b5cc7717310e04b37fc55602d83c16b35b41e875 Author: Kirill Yukhin <kirill.yuk...@intel.com> Date: Tue Dec 29 17:02:21 2015 +0300 Fix 410.bwaves. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1a87952..316977b8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16930,20 +16930,21 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "vec_dup<mode>" - [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x") + [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x") (vec_duplicate:AVX_VEC_DUP_MODE - (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))] + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))] "TARGET_AVX" "@ v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} + v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1} #" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") - (set_attr "isa" "avx2,noavx2,avx2,noavx2") - (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")]) + (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2") + (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")]) (define_split [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-3.c new file mode 100644 index 0000000..9e44513 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vbroadcastsd-3.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512f -O2 -ftree-vectorize -ffixed-xmm0 -ffixed-xmm1 -ffixed-xmm2 -ffixed-xmm3 -ffixed-xmm4 -ffixed-xmm5 -ffixed-xmm6 -ffixed-xmm7 -ffixed-xmm8 -ffixed-xmm9 -ffixed-xmm10 -ffixed-xmm11 -ffixed-xmm12 -ffixed-xmm13 -ffixed-xmm14" } */ +/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+(?:%xmm(?:\[0-9\]|1\[0-5\]),\[ \\t\]*%ymm(?:\[0-9\]|1\[0-5\])|%xmm\[0-9\]+,\[ \\t\]*%zmm)" 1 } } */ + +#include <immintrin.h> + +register __m512d z asm ("zmm16"); /* { dg-warning "call-clobbered register used for global register variable" } */ + +double a[10000]; + +void foo (unsigned N) +{ + double d; + _mm_store_sd(&d, _mm256_extractf128_pd (_mm512_extractf64x4_pd (z, 0), 0)); + + for (int i=0; i<N; i++) + { + a[i] = d; + a[i] += a[i-4]; + } +}