Hi! This patch fixes a bunch of recent regressions: FAIL: gcc.target/i386/avx-1.c (internal compiler error) FAIL: gcc.target/i386/avx-1.c (test for excess errors) FAIL: gcc.target/i386/avx-2.c (internal compiler error) FAIL: gcc.target/i386/avx-2.c (test for excess errors) FAIL: gcc.target/i386/avx512f-vec-init.c (internal compiler error) FAIL: gcc.target/i386/avx512f-vec-init.c (test for excess errors) UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vbroadcastsd 1 UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vbroadcastss 1 UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vmovdqa64[ \\\\t]+%zmm 2 UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vpbroadcastb 2 UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vpbroadcastd 1 UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vpbroadcastq 1 UNRESOLVED: gcc.target/i386/avx512f-vec-init.c scan-assembler-times vpbroadcastw 2 FAIL: gcc.target/i386/sse-14.c (internal compiler error) FAIL: gcc.target/i386/sse-14.c (test for excess errors) FAIL: gcc.target/i386/sse-22.c (internal compiler error) FAIL: gcc.target/i386/sse-22.c (test for excess errors) FAIL: gcc.target/i386/sse-22a.c (internal compiler error) FAIL: gcc.target/i386/sse-22a.c (test for excess errors) FAIL: gcc.target/i386/sse-23.c (internal compiler error) FAIL: gcc.target/i386/sse-23.c (test for excess errors) FAIL: gcc.target/i386/sse-24.c (internal compiler error) FAIL: gcc.target/i386/sse-24.c (test for excess errors) and improves quality of code generated for AVX2 and AVX512F broadcasts; as AVX2 broadcast instructions can have source in memory or vector register (but only AVX512F can have it in GPRs), the patch adds splitter for the GPR case and adds ! for that, so that RA can choose what is best and if broadcast from GPR is desirable, it first performs vmovd from GPR into the dest register and then vpbroadcast{b,w,d} it.
The AVX512* patterns should be IMHO merged, so that whether GPR or MEM is used are just alternatives of the same define_insn rather than different define_insns, but am not changing that right now, will leave that to Kirill as a follow-up. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2014-10-21 Jakub Jelinek <ja...@redhat.com> PR target/63594 * config/i386/i386.c (ix86_expand_vector_init_duplicate): For V{8HI,16QI,16HI,32QI}mode call ix86_vector_duplicate_value even for just TARGET_AVX2, not only for TARGET_AVX512VL && TARGET_AVX512BW. For V{32HI,64QI}mode, call ix86_vector_duplicate_value only if TARGET_AVX512BW, otherwise build it using concatenation of 256-bit broadcast. * config/i386/sse.md (AVX_VEC_DUP_MODE): Moved after avx512 broadcast patterns. (vec_dup<mode>): Likewise. For avx2 use v<sseintprefix>broadcast<bcstscalarsuff> instead of vbroadcast<ssescalarmodesuffix>. (AVX2_VEC_DUP_MODE): New mode iterator. (*vec_dup<mode>): New TARGET_AVX2 define_insn with AVX2_VEC_DUP_MODE iterator, add a splitter for that. * gcc.dg/pr63594-1.c: New test. * gcc.dg/pr63594-2.c: New test. * gcc.target/i386/sse2-pr63594-1.c: New test. * gcc.target/i386/sse2-pr63594-2.c: New test. * gcc.target/i386/avx-pr63594-1.c: New test. * gcc.target/i386/avx-pr63594-2.c: New test. * gcc.target/i386/avx2-pr63594-1.c: New test. * gcc.target/i386/avx2-pr63594-2.c: New test. * gcc.target/i386/avx512f-pr63594-1.c: New test. * gcc.target/i386/avx512f-pr63594-2.c: New test. * gcc.target/i386/avx512f-vec-init.c: Adjust expected insn counts. --- gcc/config/i386/i386.c.jj 2014-10-21 13:59:39.102650495 +0200 +++ gcc/config/i386/i386.c 2014-10-21 14:35:54.941980175 +0200 @@ -39855,8 +39855,6 @@ ix86_expand_vector_init_duplicate (bool case V8SFmode: case V8SImode: case V2DFmode: - case V64QImode: - case V32HImode: case V2DImode: case V4SFmode: case V4SImode: @@ -39887,8 +39885,8 @@ ix86_expand_vector_init_duplicate (bool goto widen; case V8HImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - return ix86_vector_duplicate_value (mode, target, val); + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); if (TARGET_SSE2) { @@ -39920,8 +39918,8 @@ ix86_expand_vector_init_duplicate (bool goto widen; case V16QImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - return ix86_vector_duplicate_value (mode, target, val); + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); if (TARGET_SSE2) goto permute; @@ -39952,14 +39950,31 @@ ix86_expand_vector_init_duplicate (bool case V16HImode: case V32QImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - return ix86_vector_duplicate_value (mode, target, val); + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); else { enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); rtx x = gen_reg_rtx (hvmode); ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); + gcc_assert (ok); + + x = gen_rtx_VEC_CONCAT (mode, x, x); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + } + return true; + + case V64QImode: + case V32HImode: + if (TARGET_AVX512BW) + return ix86_vector_duplicate_value (mode, target, val); + else + { + enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); + rtx x = gen_reg_rtx (hvmode); + + ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); gcc_assert (ok); x = gen_rtx_VEC_CONCAT (mode, x, x); --- gcc/config/i386/sse.md.jj 2014-10-21 11:51:30.976626802 +0200 +++ gcc/config/i386/sse.md 2014-10-21 14:38:20.690228844 +0200 @@ -16523,25 +16523,6 @@ (define_insn "avx2_vec_dupv4df" (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -;; Modes handled by AVX vec_dup patterns. -(define_mode_iterator AVX_VEC_DUP_MODE - [V8SI V8SF V4DI V4DF]) - -(define_insn "vec_dup<mode>" - [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,v,x") - (vec_duplicate:AVX_VEC_DUP_MODE - (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,v,?x")))] - "TARGET_AVX" - "@ - vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} - vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1} - #" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_evex") - (set_attr "isa" "*,avx2,noavx2") - (set_attr "mode" "V8SF")]) - (define_insn "<avx512>_vec_dup<mode><mask_name>" [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") (vec_duplicate:V48_AVX512VL @@ -16644,6 +16625,59 @@ (define_insn "avx2_vbroadcasti128_<mode> (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +;; Modes handled by AVX vec_dup patterns. +(define_mode_iterator AVX_VEC_DUP_MODE + [V8SI V8SF V4DI V4DF]) +;; Modes handled by AVX2 vec_dup patterns. +(define_mode_iterator AVX2_VEC_DUP_MODE + [V32QI V16QI V16HI V8HI V8SI V4SI]) + +(define_insn "*vec_dup<mode>" + [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,x") + (vec_duplicate:AVX2_VEC_DUP_MODE + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,!r")))] + "TARGET_AVX2" + "@ + v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} + v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} + #" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "vec_dup<mode>" + [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x") + (vec_duplicate:AVX_VEC_DUP_MODE + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))] + "TARGET_AVX" + "@ + v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} + vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} + v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} + #" + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "isa" "avx2,noavx2,avx2,noavx2") + (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")]) + +(define_split + [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand") + (vec_duplicate:AVX2_VEC_DUP_MODE + (match_operand:<ssescalarmode> 1 "register_operand")))] + "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])" + [(const_int 0)] +{ + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]), + CONST0_RTX (V4SImode), + gen_lowpart (SImode, operands[1]))); + emit_insn (gen_avx2_pbroadcast<mode> (operands[0], + gen_lowpart (<ssexmmmode>mode, + operands[0]))); + DONE; +}) + (define_split [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand") (vec_duplicate:AVX_VEC_DUP_MODE --- gcc/testsuite/gcc.dg/pr63594-1.c.jj 2014-10-21 14:49:41.756393903 +0200 +++ gcc/testsuite/gcc.dg/pr63594-1.c 2014-10-21 15:35:16.556274687 +0200 @@ -0,0 +1,65 @@ +/* PR target/63594 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wno-psabi" } */ +/* { dg-additional-options "-mno-mmx" { target i?86-*-linux* x86_64-*-linux* } } */ + +#define C1 c +#define C2 C1, C1 +#define C4 C2, C2 +#define C8 C4, C4 +#define C16 C8, C8 +#define C32 C16, C16 +#define C64 C32, C32 +#define C_(n) n +#define C(n) C_(C##n) + +#define T(t,s) \ +typedef t v##t##s __attribute__ ((__vector_size__ (s * sizeof (t)))); \ +v##t##s \ +test1##t##s (t c) \ +{ \ + v##t##s v = { C(s) }; \ + return v; \ +} \ + \ +v##t##s \ +test2##t##s (t *p) \ +{ \ + t c = *p; \ + v##t##s v = { C(s) }; \ + return v; \ +} + +typedef long long llong; + +T(char, 64) +T(char, 32) +T(char, 16) +T(char, 8) +T(char, 4) +T(char, 2) +T(char, 1) +T(short, 32) +T(short, 16) +T(short, 8) +T(short, 4) +T(short, 2) +T(short, 1) +T(int, 16) +T(int, 8) +T(int, 4) +T(int, 2) +T(int, 1) +T(float, 16) +T(float, 8) +T(float, 4) +T(float, 2) +T(float, 1) +T(llong, 8) +T(llong, 4) +T(llong, 2) +T(llong, 1) +T(double, 8) +T(double, 4) +T(double, 2) +T(double, 1) --- gcc/testsuite/gcc.dg/pr63594-2.c.jj 2014-10-21 14:51:30.562343449 +0200 +++ gcc/testsuite/gcc.dg/pr63594-2.c 2014-10-21 15:36:31.532843201 +0200 @@ -0,0 +1,92 @@ +/* PR target/63594 */ +/* { dg-do run } */ +/* { dg-options "-O2 -Wno-psabi" } */ +/* { dg-additional-options "-mno-mmx" { target i?86-*-linux* x86_64-*-linux* } } */ + +#define C1 c +#define C2 C1, C1 +#define C4 C2, C2 +#define C8 C4, C4 +#define C16 C8, C8 +#define C32 C16, C16 +#define C64 C32, C32 +#define C_(n) n +#define C(n) C_(C##n) + +#define T(t,s) \ +typedef t v##t##s __attribute__ ((__vector_size__ (s * sizeof (t)))); \ +__attribute__((noinline, noclone)) v##t##s \ +test1##t##s (t c) \ +{ \ + v##t##s v = { C(s) }; \ + return v; \ +} \ + \ +__attribute__((noinline, noclone)) v##t##s \ +test2##t##s (t *p) \ +{ \ + t c = *p; \ + v##t##s v = { C(s) }; \ + return v; \ +} \ + \ +void \ +test3##t##s (void) \ +{ \ + t c = 17; \ + int i; \ + v##t##s a = test1##t##s (c); \ + for (i = 0; i < s; i++) \ + if (a[i] != 17) \ + __builtin_abort (); \ + v##t##s b = test2##t##s (&c); \ + for (i = 0; i < s; i++) \ + if (a[i] != 17) \ + __builtin_abort (); \ +} + +typedef long long llong; + +#define TESTS \ +T(char, 64) \ +T(char, 32) \ +T(char, 16) \ +T(char, 8) \ +T(char, 4) \ +T(char, 2) \ +T(char, 1) \ +T(short, 32) \ +T(short, 16) \ +T(short, 8) \ +T(short, 4) \ +T(short, 2) \ +T(short, 1) \ +T(int, 16) \ +T(int, 8) \ +T(int, 4) \ +T(int, 2) \ +T(int, 1) \ +T(float, 16) \ +T(float, 8) \ +T(float, 4) \ +T(float, 2) \ +T(float, 1) \ +T(llong, 8) \ +T(llong, 4) \ +T(llong, 2) \ +T(llong, 1) \ +T(double, 8) \ +T(double, 4) \ +T(double, 2) \ +T(double, 1) + +TESTS + +int +main () +{ +#undef T +#define T(t,s) test3##t##s (); + TESTS + return 0; +} --- gcc/testsuite/gcc.target/i386/sse2-pr63594-1.c.jj 2014-10-21 15:41:08.081652929 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-pr63594-1.c 2014-10-21 15:41:49.322893733 +0200 @@ -0,0 +1,5 @@ +/* PR target/63594 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-mmx -Wno-psabi" } */ + +#include "../../gcc.dg/pr63594-1.c" --- gcc/testsuite/gcc.target/i386/sse2-pr63594-2.c.jj 2014-10-21 15:40:13.361676458 +0200 +++ gcc/testsuite/gcc.target/i386/sse2-pr63594-2.c 2014-10-21 15:41:27.480287985 +0200 @@ -0,0 +1,18 @@ +/* PR target/63594 */ +/* { dg-do run { target sse2 } } */ +/* { dg-options "-O2 -msse2 -mno-mmx -Wno-psabi" } */ + +#include "sse2-check.h" + +int do_main (void); + +static void +sse2_test (void) +{ + do_main (); +} + +#undef main +#define main() do_main () + +#include "../../gcc.dg/pr63594-2.c" --- gcc/testsuite/gcc.target/i386/avx-pr63594-1.c.jj 2014-10-21 15:41:08.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx-pr63594-1.c 2014-10-21 15:43:16.577240468 +0200 @@ -0,0 +1,5 @@ +/* PR target/63594 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -mno-mmx -Wno-psabi" } */ + +#include "../../gcc.dg/pr63594-1.c" --- gcc/testsuite/gcc.target/i386/avx-pr63594-2.c.jj 2014-10-21 15:40:13.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx-pr63594-2.c 2014-10-21 15:43:25.527072754 +0200 @@ -0,0 +1,18 @@ +/* PR target/63594 */ +/* { dg-do run { target avx } } */ +/* { dg-options "-O2 -mavx -mno-mmx -Wno-psabi" } */ + +#include "avx-check.h" + +int do_main (void); + +static void +avx_test (void) +{ + do_main (); +} + +#undef main +#define main() do_main () + +#include "../../gcc.dg/pr63594-2.c" --- gcc/testsuite/gcc.target/i386/avx2-pr63594-1.c.jj 2014-10-21 15:41:08.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx2-pr63594-1.c 2014-10-21 15:44:04.167347796 +0200 @@ -0,0 +1,5 @@ +/* PR target/63594 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -mno-mmx -Wno-psabi" } */ + +#include "../../gcc.dg/pr63594-1.c" --- gcc/testsuite/gcc.target/i386/avx2-pr63594-2.c.jj 2014-10-21 15:40:13.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx2-pr63594-2.c 2014-10-21 15:44:11.155210402 +0200 @@ -0,0 +1,18 @@ +/* PR target/63594 */ +/* { dg-do run { target avx2 } } */ +/* { dg-options "-O2 -mavx2 -mno-mmx -Wno-psabi" } */ + +#include "avx2-check.h" + +int do_main (void); + +static void +avx2_test (void) +{ + do_main (); +} + +#undef main +#define main() do_main () + +#include "../../gcc.dg/pr63594-2.c" --- gcc/testsuite/gcc.target/i386/avx512f-pr63594-1.c.jj 2014-10-21 15:41:08.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-pr63594-1.c 2014-10-21 15:45:26.997790887 +0200 @@ -0,0 +1,5 @@ +/* PR target/63594 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mno-mmx -Wno-psabi" } */ + +#include "../../gcc.dg/pr63594-1.c" --- gcc/testsuite/gcc.target/i386/avx512f-pr63594-2.c.jj 2014-10-21 15:40:13.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512f-pr63594-2.c 2014-10-21 15:45:45.048455116 +0200 @@ -0,0 +1,18 @@ +/* PR target/63594 */ +/* { dg-do run { target avx512f } } */ +/* { dg-options "-O2 -mavx512f -mno-mmx -Wno-psabi" } */ + +#include "avx512f-check.h" + +int do_main (void); + +static void +avx512f_test (void) +{ + do_main (); +} + +#undef main +#define main() do_main () + +#include "../../gcc.dg/pr63594-2.c" --- gcc/testsuite/gcc.target/i386/avx512f-vec-init.c.jj 2014-01-14 09:59:05.000000000 +0100 +++ gcc/testsuite/gcc.target/i386/avx512f-vec-init.c 2014-10-21 17:43:03.000000000 +0200 @@ -1,12 +1,12 @@ /* { dg-do compile } */ /* { dg-options "-O3 -mavx512f" } */ -/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+%zmm" 2 } } */ -/* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */ -/* { dg-final { scan-assembler-times "vpbroadcastq" 1 } } */ -/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */ -/* { dg-final { scan-assembler-times "vpbroadcastw" 2 } } */ -/* { dg-final { scan-assembler-times "vbroadcastss" 1 } } */ -/* { dg-final { scan-assembler-times "vbroadcastsd" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+%zmm" 0 } } */ +/* { dg-final { scan-assembler-times "vpbroadcastd" 2 } } */ +/* { dg-final { scan-assembler-times "vpbroadcastq" 2 } } */ +/* { dg-final { scan-assembler-times "vpbroadcastb" 3 } } */ +/* { dg-final { scan-assembler-times "vpbroadcastw" 3 } } */ +/* { dg-final { scan-assembler-times "vbroadcastss" 0 } } */ +/* { dg-final { scan-assembler-times "vbroadcastsd" 0 } } */ #include <x86intrin.h> Jakub