Hello, Patch in the bottom extends insert insn patterns. It also removes some exapnds.
Bootstrapped. AVX-512* tests on top of patch-set all pass under simulator. Is it ok for trunk? gcc/ * config/i386/i386.c (CODE_FOR_avx2_extracti128): Rename to ... (CODE_FOR_avx_vextractf128v4di): this. (CODE_FOR_avx2_inserti128): Rename to ... (CODE_FOR_avx_vinsertf128v4di): this. (ix86_expand_args_builtin): Handle CODE_FOR_avx_vinsertf128v4di, CODE_FOR_avx_vextractf128v4di. (ix86_expand_args_builtin): Handle CODE_FOR_avx512dq_vinsertf32x8_mask, CODE_FOR_avx512dq_vinserti32x8_mask, CODE_FOR_avx512vl_vinsertv4df, CODE_FOR_avx512vl_vinsertv4di, CODE_FOR_avx512vl_vinsertv8sf, CODE_FOR_avx512vl_vinsertv8si. * config/i386/sse.md (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"): Use AVX512_VEC. (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"): Ditto. (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"): Use AVX512_VEC_2. (define_insn "vec_set_lo_<mode><mask_name>"): New. (define_insn "vec_set_hi_<mode><mask_name>"): Ditto. (define_expand "avx512vl_vinsert<mode>"): Ditto. (define_insn "avx2_vec_set_lo_v4di"): Delete. (define_insn "avx2_vec_set_hi_v4di"): Ditto. (define_insn "vec_set_lo_<mode><mask_name>"): Add masking. (define_insn "vec_set_hi_<mode><mask_name>"): Ditto. (define_insn "vec_set_lo_<mode><mask_name>"): Ditto. (define_insn "vec_set_hi_<mode><mask_name>"): Ditto. (define_expand "avx2_extracti128"): Delete. (define_expand "avx2_inserti128"): Ditto. -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ccfd47d..af61628 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -29965,8 +29965,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, @@ -34057,8 +34057,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, if (!match) switch (icode) { - case CODE_FOR_avx2_inserti128: - case CODE_FOR_avx2_extracti128: + case CODE_FOR_avx_vinsertf128v4di: + case CODE_FOR_avx_vextractf128v4di: error ("the last argument must be an 1-bit immediate"); return const0_rtx; @@ -34124,6 +34124,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_vinserti64x4_mask: case CODE_FOR_avx512f_vextractf64x4_mask: case CODE_FOR_avx512f_vextracti64x4_mask: + case CODE_FOR_avx512dq_vinsertf32x8_mask: + case CODE_FOR_avx512dq_vinserti32x8_mask: + case CODE_FOR_avx512vl_vinsertv4df: + case CODE_FOR_avx512vl_vinsertv4di: + case CODE_FOR_avx512vl_vinsertv8sf: + case CODE_FOR_avx512vl_vinsertv8si: error ("the last argument must be a 1-bit immediate"); return const0_rtx; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a7cc5ad..2dd79d0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11199,80 +11199,64 @@ (set_attr "prefix" "orig,orig,vex,vex") (set_attr "mode" "TI")]) -(define_expand "avx512f_vinsert<shuffletype>32x4_mask" - [(match_operand:V16FI 0 "register_operand") - (match_operand:V16FI 1 "register_operand") +(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask" + [(match_operand:AVX512_VEC 0 "register_operand") + (match_operand:AVX512_VEC 1 "register_operand") (match_operand:<ssequartermode> 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_3_operand") - (match_operand:V16FI 4 "register_operand") + (match_operand:AVX512_VEC 4 "register_operand") (match_operand:<avx512fmaskmode> 5 "register_operand")] "TARGET_AVX512F" { - switch (INTVAL (operands[3])) - { - case 0: - emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xFFF), operands[4], - operands[5])); - break; - case 1: - emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xF0FF), operands[4], - operands[5])); - break; - case 2: - emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xFF0F), operands[4], - operands[5])); - break; - case 3: - emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xFFF0), operands[4], - operands[5])); - break; - default: - gcc_unreachable (); - } + int mask,selector; + mask = INTVAL (operands[3]); + selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ? + 0xFFFF ^ (0xF000 >> mask * 4) + : 0xFF ^ (0xC0 >> mask * 2); + emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask + (operands[0], operands[1], operands[2], GEN_INT (selector), + operands[4], operands[5])); DONE; - }) -(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>" - [(set (match_operand:V16FI 0 "register_operand" "=v") - (vec_merge:V16FI - (match_operand:V16FI 1 "register_operand" "v") - (vec_duplicate:V16FI +(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>" + [(set (match_operand:AVX512_VEC 0 "register_operand" "=v") + (vec_merge:AVX512_VEC + (match_operand:AVX512_VEC 1 "register_operand" "v") + (vec_duplicate:AVX512_VEC (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm")) (match_operand:SI 3 "const_int_operand" "n")))] "TARGET_AVX512F" { int mask; - if (INTVAL (operands[3]) == 0xFFF) - mask = 0; - else if ( INTVAL (operands[3]) == 0xF0FF) - mask = 1; - else if ( INTVAL (operands[3]) == 0xFF0F) - mask = 2; - else if ( INTVAL (operands[3]) == 0xFFF0) - mask = 3; + int selector = INTVAL (operands[3]); + + if (selector == 0xFFF || selector == 0x3F) + mask = 0; + else if ( selector == 0xF0FF || selector == 0xCF) + mask = 1; + else if ( selector == 0xFF0F || selector == 0xF3) + mask = 2; + else if ( selector == 0xFFF0 || selector == 0xFC) + mask = 3; else gcc_unreachable (); operands[3] = GEN_INT (mask); - return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"; + return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"; } [(set_attr "type" "sselog") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "avx512f_vinsert<shuffletype>64x4_mask" - [(match_operand:V8FI 0 "register_operand") - (match_operand:V8FI 1 "register_operand") +(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask" + [(match_operand:AVX512_VEC_2 0 "register_operand") + (match_operand:AVX512_VEC_2 1 "register_operand") (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_1_operand") - (match_operand:V8FI 4 "register_operand") + (match_operand:AVX512_VEC_2 4 "register_operand") (match_operand:<avx512fmaskmode> 5 "register_operand")] "TARGET_AVX512F" { @@ -11289,6 +11273,40 @@ }) (define_insn "vec_set_lo_<mode><mask_name>" + [(set (match_operand:V16FI 0 "register_operand" "=v") + (vec_concat:V16FI + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") + (vec_select:<ssehalfvecmode> + (match_operand:V16FI 1 "register_operand" "v") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)]))))] + "TARGET_AVX512DQ" + "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}" + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "vec_set_hi_<mode><mask_name>" + [(set (match_operand:V16FI 0 "register_operand" "=v") + (vec_concat:V16FI + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") + (vec_select:<ssehalfvecmode> + (match_operand:V16FI 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] + "TARGET_AVX512DQ" + "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}" + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "vec_set_lo_<mode><mask_name>" [(set (match_operand:V8FI 0 "register_operand" "=v") (vec_concat:V8FI (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") @@ -16330,6 +16348,34 @@ (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex")]) +(define_expand "avx512vl_vinsert<mode>" + [(match_operand:VI48F_256 0 "register_operand") + (match_operand:VI48F_256 1 "register_operand") + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_1_operand") + (match_operand:VI48F_256 4 "register_operand") + (match_operand:<avx512fmaskmode> 5 "register_operand")] + "TARGET_AVX512VL" +{ + rtx (*insn)(rtx, rtx, rtx, rtx, rtx); + + switch (INTVAL (operands[3])) + { + case 0: + insn = gen_vec_set_lo_<mode>_mask; + break; + case 1: + insn = gen_vec_set_hi_<mode>_mask; + break; + default: + gcc_unreachable (); + } + + emit_insn (insn (operands[0], operands[1], operands[2], operands[4], + operands[5])); + DONE; +}) + (define_expand "avx_vinsertf128<mode>" [(match_operand:V_256 0 "register_operand") (match_operand:V_256 1 "register_operand") @@ -16355,92 +16401,82 @@ DONE; }) -(define_insn "avx2_vec_set_lo_v4di" - [(set (match_operand:V4DI 0 "register_operand" "=x") - (vec_concat:V4DI - (match_operand:V2DI 2 "nonimmediate_operand" "xm") - (vec_select:V2DI - (match_operand:V4DI 1 "register_operand" "x") - (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_AVX2" - "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "avx2_vec_set_hi_v4di" - [(set (match_operand:V4DI 0 "register_operand" "=x") - (vec_concat:V4DI - (vec_select:V2DI - (match_operand:V4DI 1 "register_operand" "x") - (parallel [(const_int 0) (const_int 1)])) - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_AVX2" - "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "vec_set_lo_<mode>" - [(set (match_operand:VI8F_256 0 "register_operand" "=x") +(define_insn "vec_set_lo_<mode><mask_name>" + [(set (match_operand:VI8F_256 0 "register_operand" "=v") (vec_concat:VI8F_256 - (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") (vec_select:<ssehalfvecmode> - (match_operand:VI8F_256 1 "register_operand" "x") + (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 2) (const_int 3)]))))] "TARGET_AVX" - "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" +{ + if (TARGET_AVX512VL) + return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; + else + return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "vec_set_hi_<mode>" - [(set (match_operand:VI8F_256 0 "register_operand" "=x") +(define_insn "vec_set_hi_<mode><mask_name>" + [(set (match_operand:VI8F_256 0 "register_operand" "=v") (vec_concat:VI8F_256 (vec_select:<ssehalfvecmode> - (match_operand:VI8F_256 1 "register_operand" "x") + (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1)])) - (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] "TARGET_AVX" - "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" +{ + if (TARGET_AVX512VL) + return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; + else + return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "vec_set_lo_<mode>" - [(set (match_operand:VI4F_256 0 "register_operand" "=x") +(define_insn "vec_set_lo_<mode><mask_name>" + [(set (match_operand:VI4F_256 0 "register_operand" "=v") (vec_concat:VI4F_256 - (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") (vec_select:<ssehalfvecmode> - (match_operand:VI4F_256 1 "register_operand" "x") + (match_operand:VI4F_256 1 "register_operand" "v") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_AVX" - "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" +{ + if (TARGET_AVX512VL) + return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; + else + return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "vec_set_hi_<mode>" - [(set (match_operand:VI4F_256 0 "register_operand" "=x") +(define_insn "vec_set_hi_<mode><mask_name>" + [(set (match_operand:VI4F_256 0 "register_operand" "=v") (vec_concat:VI4F_256 (vec_select:<ssehalfvecmode> - (match_operand:VI4F_256 1 "register_operand" "x") + (match_operand:VI4F_256 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) - (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] "TARGET_AVX" - "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" +{ + if (TARGET_AVX512VL) + return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; + else + return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -16611,55 +16647,6 @@ DONE; }) -(define_expand "avx2_extracti128" - [(match_operand:V2DI 0 "nonimmediate_operand") - (match_operand:V4DI 1 "register_operand") - (match_operand:SI 2 "const_0_to_1_operand")] - "TARGET_AVX2" -{ - rtx (*insn)(rtx, rtx); - - switch (INTVAL (operands[2])) - { - case 0: - insn = gen_vec_extract_lo_v4di; - break; - case 1: - insn = gen_vec_extract_hi_v4di; - break; - default: - gcc_unreachable (); - } - - emit_insn (insn (operands[0], operands[1])); - DONE; -}) - -(define_expand "avx2_inserti128" - [(match_operand:V4DI 0 "register_operand") - (match_operand:V4DI 1 "register_operand") - (match_operand:V2DI 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_1_operand")] - "TARGET_AVX2" -{ - rtx (*insn)(rtx, rtx, rtx); - - switch (INTVAL (operands[3])) - { - case 0: - insn = gen_avx2_vec_set_lo_v4di; - break; - case 1: - insn = gen_avx2_vec_set_hi_v4di; - break; - default: - gcc_unreachable (); - } - - emit_insn (insn (operands[0], operands[1], operands[2])); - DONE; -}) - (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v") (ashiftrt:VI48_AVX512F_AVX512VL