Hi! The vinsert[if]64x2 instructions are AVX512VL & AVX512DQ, so if only AVX512VL is on, we should emit the other insns - 32x4, which without masking do the same thing. With masking, we have to require TARGET_AVX512DQ.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-05-18 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (vec_set_lo_<mode><mask_name>, vec_set_hi_<mode><mask_name>): Add && <mask_avx512dq_condition> condition. For !TARGET_AVX512DQ, emit 32x4 instruction instead of 64x2. * gcc.target/i386/avx512dq-vinsert-1.c: New test. * gcc.target/i386/avx512vl-vinsert-1.c: New test. --- gcc/config/i386/sse.md.jj 2016-05-18 15:02:54.000000000 +0200 +++ gcc/config/i386/sse.md 2016-05-18 15:54:20.944236472 +0200 @@ -17823,10 +17823,12 @@ (define_insn "vec_set_lo_<mode><mask_nam (vec_select:<ssehalfvecmode> (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_AVX" + "TARGET_AVX && <mask_avx512dq_condition>" { - if (TARGET_AVX512VL) + if (TARGET_AVX512DQ) return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; + else if (TARGET_AVX512VL) + return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; else return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; } @@ -17843,10 +17845,12 @@ (define_insn "vec_set_hi_<mode><mask_nam (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1)])) (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] - "TARGET_AVX" + "TARGET_AVX && <mask_avx512dq_condition>" { - if (TARGET_AVX512VL) + if (TARGET_AVX512DQ) return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; + else if (TARGET_AVX512VL) + return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; else return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; } --- gcc/testsuite/gcc.target/i386/avx512dq-vinsert-1.c.jj 2016-05-18 16:08:48.572351388 +0200 +++ gcc/testsuite/gcc.target/i386/avx512dq-vinsert-1.c 2016-05-18 16:09:18.114947627 +0200 @@ -0,0 +1,100 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */ + +typedef int V1 __attribute__((vector_size (32))); +typedef long long V2 __attribute__((vector_size (32))); +typedef float V3 __attribute__((vector_size (32))); +typedef double V4 __attribute__((vector_size (32))); + +void +f1 (V1 x, int y) +{ + register V1 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +void +f2 (V1 x, int y) +{ + register V1 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[6] = y; + asm volatile ("" : "+v" (a)); +} + +void +f3 (V2 x, long long y) +{ + register V2 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[1] = y; + asm volatile ("" : "+v" (a)); +} + +void +f4 (V2 x, long long y) +{ + register V2 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +void +f5 (V3 x, float y) +{ + register V3 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +void +f6 (V3 x, float y) +{ + register V3 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[6] = y; + asm volatile ("" : "+v" (a)); +} + +void +f7 (V4 x, double y) +{ + register V4 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[1] = y; + asm volatile ("" : "+v" (a)); +} + +void +f8 (V4 x, double y) +{ + register V4 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\r]*0x0\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\r]*0x1\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\r]*0x0\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\r]*0x1\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vextracti32x4\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 1 } } */ +/* { dg-final { scan-assembler-times "vextractf32x4\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinserti64x2\[^\n\r]*0x0\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinserti64x2\[^\n\r]*0x1\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertf64x2\[^\n\r]*0x0\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertf64x2\[^\n\r]*0x1\[^\n\r]*%ymm16" 1 } } */ +/* { dg-final { scan-assembler-times "vextracti64x2\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 1 } } */ +/* { dg-final { scan-assembler-times "vextractf64x2\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 1 } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-vinsert-1.c.jj 2016-05-18 16:07:03.928781560 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vinsert-1.c 2016-05-18 16:08:29.500612043 +0200 @@ -0,0 +1,98 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */ + +typedef int V1 __attribute__((vector_size (32))); +typedef long long V2 __attribute__((vector_size (32))); +typedef float V3 __attribute__((vector_size (32))); +typedef double V4 __attribute__((vector_size (32))); + +void +f1 (V1 x, int y) +{ + register V1 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +void +f2 (V1 x, int y) +{ + register V1 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[6] = y; + asm volatile ("" : "+v" (a)); +} + +void +f3 (V2 x, long long y) +{ + register V2 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[1] = y; + asm volatile ("" : "+v" (a)); +} + +void +f4 (V2 x, long long y) +{ + register V2 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +void +f5 (V3 x, float y) +{ + register V3 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +void +f6 (V3 x, float y) +{ + register V3 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[6] = y; + asm volatile ("" : "+v" (a)); +} + +void +f7 (V4 x, double y) +{ + register V4 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[1] = y; + asm volatile ("" : "+v" (a)); +} + +void +f8 (V4 x, double y) +{ + register V4 a __asm ("xmm16"); + a = x; + asm volatile ("" : "+v" (a)); + a[3] = y; + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\r]*0x0\[^\n\r]*%ymm16" 2 } } */ +/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\r]*0x1\[^\n\r]*%ymm16" 2 } } */ +/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\r]*0x0\[^\n\r]*%ymm16" 2 } } */ +/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\r]*0x1\[^\n\r]*%ymm16" 2 } } */ +/* { dg-final { scan-assembler-times "vextracti32x4\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 2 } } */ +/* { dg-final { scan-assembler-times "vextractf32x4\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 2 } } */ +/* { dg-final { scan-assembler-not "vinserti64x2" } } */ +/* { dg-final { scan-assembler-not "vinsertf64x2" } } */ +/* { dg-final { scan-assembler-not "vextracti64x2" } } */ +/* { dg-final { scan-assembler-not "vextracti64x2" } } */ Jakub