On Fri, Mar 3, 2017 at 8:42 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > vpermq/vpermpd instructions for 512-bit vectors use bogus RTL and if > we happen to simplify-rtx.c it, we ICE. > The problem is that for V8D[IF]mode VEC_SELECT we need to use a PARALLEL > with 8 elements, not 4. > The <avx512>_vec_dup<mode>_1 change is unrelated to this, spotted > first by manual inspection and verified by the genrecog.c verify_pattern > patch; the broadcast wants to broadcast the first element, so it should be > a scalar vec_select in vec_duplicate, rather than same size vector as > vec_select's operand. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-03-03 Jakub Jelinek <ja...@redhat.com> > > PR target/79812 > * config/i386/sse.md (VI8F_256_512): Remove mode iterator. > (<avx2_avx512>_perm<mode>): Rename to ... > (avx2_perm<mode>): ... this. Use VI8F_256 iterator instead > of VI8F_256_512. > (<avx512>_perm<mode>_mask): Rename to ... > (avx512vl_perm<mode>_mask): ... this. Use VI8F_256 iterator instead > of VI8F_256_512. > (<avx2_avx512>_perm<mode>_1<mask_name>): Rename to ... > (avx2_perm<mode>_1<mask_name): ... this. Use VI8F_256 iterator > instead of VI8F_256_512. > (avx512f_perm<mode>): New define_expand. > (avx512f_perm<mode>_mask): Likewise. > (avx512f_perm<mode>_1<mask_name>): New define_insn. > (<avx512>_vec_dup<mode>_1): Fix up vec_select mode. > > * gcc.target/i386/avx512f-vpermq-imm-3.c: New test.
LGTM. Thanks, Uros. > --- gcc/config/i386/sse.md.jj 2017-03-02 10:19:07.000000000 +0100 > +++ gcc/config/i386/sse.md 2017-03-03 16:10:42.317111636 +0100 > @@ -549,8 +549,6 @@ (define_mode_iterator VI4F_128 [V4SI V4S > (define_mode_iterator VI8F_128 [V2DI V2DF]) > (define_mode_iterator VI4F_256 [V8SI V8SF]) > (define_mode_iterator VI8F_256 [V4DI V4DF]) > -(define_mode_iterator VI8F_256_512 > - [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) > (define_mode_iterator VI48F_256_512 > [V8SI V8SF > (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") > @@ -17306,43 +17304,43 @@ (define_insn "<avx512>_permvar<mode><mas > (set_attr "prefix" "<mask_prefix2>") > (set_attr "mode" "<sseinsnmode>")]) > > -(define_expand "<avx2_avx512>_perm<mode>" > - [(match_operand:VI8F_256_512 0 "register_operand") > - (match_operand:VI8F_256_512 1 "nonimmediate_operand") > +(define_expand "avx2_perm<mode>" > + [(match_operand:VI8F_256 0 "register_operand") > + (match_operand:VI8F_256 1 "nonimmediate_operand") > (match_operand:SI 2 "const_0_to_255_operand")] > "TARGET_AVX2" > { > int mask = INTVAL (operands[2]); > - emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1], > - GEN_INT ((mask >> 0) & 3), > - GEN_INT ((mask >> 2) & 3), > - GEN_INT ((mask >> 4) & 3), > - GEN_INT ((mask >> 6) & 3))); > + emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1], > + GEN_INT ((mask >> 0) & 3), > + GEN_INT ((mask >> 2) & 3), > + GEN_INT ((mask >> 4) & 3), > + GEN_INT ((mask >> 6) & 3))); > DONE; > }) > > -(define_expand "<avx512>_perm<mode>_mask" > - [(match_operand:VI8F_256_512 0 "register_operand") > - (match_operand:VI8F_256_512 1 "nonimmediate_operand") > +(define_expand "avx512vl_perm<mode>_mask" > + [(match_operand:VI8F_256 0 "register_operand") > + (match_operand:VI8F_256 1 "nonimmediate_operand") > (match_operand:SI 2 "const_0_to_255_operand") > - (match_operand:VI8F_256_512 3 "vector_move_operand") > + (match_operand:VI8F_256 3 "vector_move_operand") > (match_operand:<avx512fmaskmode> 4 "register_operand")] > - "TARGET_AVX512F" > + "TARGET_AVX512VL" > { > int mask = INTVAL (operands[2]); > emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1], > - GEN_INT ((mask >> 0) & 3), > - GEN_INT ((mask >> 2) & 3), > - GEN_INT ((mask >> 4) & 3), > - GEN_INT ((mask >> 6) & 3), > - operands[3], operands[4])); > + GEN_INT ((mask >> 0) & 3), > + GEN_INT ((mask >> 2) & 3), > + GEN_INT ((mask >> 4) & 3), > + GEN_INT ((mask >> 6) & 3), > + operands[3], operands[4])); > DONE; > }) > > -(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>" > - [(set (match_operand:VI8F_256_512 0 "register_operand" "=v") > - (vec_select:VI8F_256_512 > - (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm") > +(define_insn "avx2_perm<mode>_1<mask_name>" > + [(set (match_operand:VI8F_256 0 "register_operand" "=v") > + (vec_select:VI8F_256 > + (match_operand:VI8F_256 1 "nonimmediate_operand" "vm") > (parallel [(match_operand 2 "const_0_to_3_operand") > (match_operand 3 "const_0_to_3_operand") > (match_operand 4 "const_0_to_3_operand") > @@ -17361,6 +17359,77 @@ (define_insn "<avx2_avx512>_perm<mode>_1 > (set_attr "prefix" "<mask_prefix2>") > (set_attr "mode" "<sseinsnmode>")]) > > +(define_expand "avx512f_perm<mode>" > + [(match_operand:V8FI 0 "register_operand") > + (match_operand:V8FI 1 "nonimmediate_operand") > + (match_operand:SI 2 "const_0_to_255_operand")] > + "TARGET_AVX512F" > +{ > + int mask = INTVAL (operands[2]); > + emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1], > + GEN_INT ((mask >> 0) & 3), > + GEN_INT ((mask >> 2) & 3), > + GEN_INT ((mask >> 4) & 3), > + GEN_INT ((mask >> 6) & 3), > + GEN_INT (((mask >> 0) & 3) + 4), > + GEN_INT (((mask >> 2) & 3) + 4), > + GEN_INT (((mask >> 4) & 3) + 4), > + GEN_INT (((mask >> 6) & 3) + 4))); > + DONE; > +}) > + > +(define_expand "avx512f_perm<mode>_mask" > + [(match_operand:V8FI 0 "register_operand") > + (match_operand:V8FI 1 "nonimmediate_operand") > + (match_operand:SI 2 "const_0_to_255_operand") > + (match_operand:V8FI 3 "vector_move_operand") > + (match_operand:<avx512fmaskmode> 4 "register_operand")] > + "TARGET_AVX512F" > +{ > + int mask = INTVAL (operands[2]); > + emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1], > + GEN_INT ((mask >> 0) & 3), > + GEN_INT ((mask >> 2) & 3), > + GEN_INT ((mask >> 4) & 3), > + GEN_INT ((mask >> 6) & 3), > + GEN_INT (((mask >> 0) & 3) + 4), > + GEN_INT (((mask >> 2) & 3) + 4), > + GEN_INT (((mask >> 4) & 3) + 4), > + GEN_INT (((mask >> 6) & 3) + 4), > + operands[3], operands[4])); > + DONE; > +}) > + > +(define_insn "avx512f_perm<mode>_1<mask_name>" > + [(set (match_operand:V8FI 0 "register_operand" "=v") > + (vec_select:V8FI > + (match_operand:V8FI 1 "nonimmediate_operand" "vm") > + (parallel [(match_operand 2 "const_0_to_3_operand") > + (match_operand 3 "const_0_to_3_operand") > + (match_operand 4 "const_0_to_3_operand") > + (match_operand 5 "const_0_to_3_operand") > + (match_operand 6 "const_4_to_7_operand") > + (match_operand 7 "const_4_to_7_operand") > + (match_operand 8 "const_4_to_7_operand") > + (match_operand 9 "const_4_to_7_operand")])))] > + "TARGET_AVX512F && <mask_mode512bit_condition> > + && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4) > + && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) > + && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) > + && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))" > +{ > + int mask = 0; > + mask |= INTVAL (operands[2]) << 0; > + mask |= INTVAL (operands[3]) << 2; > + mask |= INTVAL (operands[4]) << 4; > + mask |= INTVAL (operands[5]) << 6; > + operands[2] = GEN_INT (mask); > + return "vperm<ssemodesuffix>\t{%2, %1, > %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; > +} > + [(set_attr "type" "sselog") > + (set_attr "prefix" "<mask_prefix2>") > + (set_attr "mode" "<sseinsnmode>")]) > + > (define_insn "avx2_permv2ti" > [(set (match_operand:V4DI 0 "register_operand" "=x") > (unspec:V4DI > @@ -17389,7 +17458,7 @@ (define_insn "avx2_vec_dupv4df" > (define_insn "<avx512>_vec_dup<mode>_1" > [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v") > (vec_duplicate:VI_AVX512BW > - (vec_select:VI_AVX512BW > + (vec_select:<ssescalarmode> > (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m") > (parallel [(const_int 0)]))))] > "TARGET_AVX512F" > --- gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c.jj 2017-03-03 > 16:13:19.852037848 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-3.c 2017-03-03 > 16:14:14.952312508 +0100 > @@ -0,0 +1,5 @@ > +/* PR target/79812 */ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mavx512f" } */ > + > +#include "avx512f-vpermq-imm-2.c" > > Jakub