Attached fairly rivial patch adds ASHIFTRT handling for TARGET_AVX512VL targets in STV pass.
The patch also merges a couple of shift vector insn patterns. 2017-04-25 Uros Bizjak <ubiz...@gmail.com> PR target/70799 * config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle ASHIFTRT. (dimode_scalar_chain::compute_convert_gain): Ditto. (dimode_scalar_chain::make_vector_copies): Ditto. (dimode_scalar_chain::convert_reg): Ditto. (dimode_scalar_chain::convert_insn): Ditto. * config/i386/sse.md (VI24_AVX512BW_1): Remove mode iterator. (VI248_AVX512BW_1): New mode iterator. (<mask_codefor>ashr<mode>3<mask_name>): Merge insn pattern with <mask_codefor>ashrv2di3<mask_name> insn using VI248_AVX512BW_1 mode iterator. testsuite/ChangeLog: 2017-04-25 Uros Bizjak <ubiz...@gmail.com> PR target/70799 * gcc.target/i386/pr70799-5.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. My target doesn't test AVX512VL runtime, but considering how trivial the patch is, I don't expect problems with it. I did test for ICEs, though. Committed to mainline SVN. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 247249) +++ config/i386/i386.c (working copy) @@ -2809,6 +2809,11 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *ins switch (GET_CODE (src)) { + case ASHIFTRT: + if (!TARGET_AVX512VL) + return false; + /* FALLTHRU */ + case ASHIFT: case LSHIFTRT: if (!REG_P (XEXP (src, 1)) @@ -3412,6 +3417,7 @@ dimode_scalar_chain::compute_convert_gain () else if (MEM_P (src) && REG_P (dst)) gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; else if (GET_CODE (src) == ASHIFT + || GET_CODE (src) == ASHIFTRT || GET_CODE (src) == LSHIFTRT) { if (CONST_INT_P (XEXP (src, 0))) @@ -3560,6 +3566,7 @@ dimode_scalar_chain::make_vector_copies (unsigned rtx src = SET_SRC (def_set); if ((GET_CODE (src) == ASHIFT + || GET_CODE (src) == ASHIFTRT || GET_CODE (src) == LSHIFTRT) && !CONST_INT_P (XEXP (src, 1)) && reg_or_subregno (XEXP (src, 1)) == regno) @@ -3648,6 +3655,7 @@ dimode_scalar_chain::make_vector_copies (unsigned rtx src = SET_SRC (def_set); if ((GET_CODE (src) == ASHIFT + || GET_CODE (src) == ASHIFTRT || GET_CODE (src) == LSHIFTRT) && !CONST_INT_P (XEXP (src, 1)) && reg_or_subregno (XEXP (src, 1)) == regno) @@ -3758,6 +3766,7 @@ dimode_scalar_chain::convert_reg (unsigned regno) rtx dst = SET_DEST (def_set); if ((GET_CODE (src) == ASHIFT + || GET_CODE (src) == ASHIFTRT || GET_CODE (src) == LSHIFTRT) && !CONST_INT_P (XEXP (src, 1)) && reg_or_subregno (XEXP (src, 1)) == regno) @@ -3902,6 +3911,7 @@ dimode_scalar_chain::convert_insn (rtx_insn *insn) switch (GET_CODE (src)) { case ASHIFT: + case ASHIFTRT: case LSHIFTRT: convert_op (&XEXP (src, 0), insn); PUT_MODE (src, V2DImode); Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 247249) +++ config/i386/sse.md (working copy) @@ -413,9 +413,10 @@ (V4DI "TARGET_AVX512VL") V16SI V8DI]) ;; Suppose TARGET_AVX512VL as baseline -(define_mode_iterator VI24_AVX512BW_1 +(define_mode_iterator VI248_AVX512BW_1 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW") - V8SI V4SI]) + V8SI V4SI + V2DI]) (define_mode_iterator VI48_AVX512F [(V16SI "TARGET_AVX512F") V8SI V4SI @@ -10617,9 +10618,9 @@ }) (define_insn "<mask_codefor>ashr<mode>3<mask_name>" - [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") - (ashiftrt:VI24_AVX512BW_1 - (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") + [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v") + (ashiftrt:VI248_AVX512BW_1 + (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm") (match_operand:DI 2 "nonmemory_operand" "v,N")))] "TARGET_AVX512VL" "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" @@ -10649,20 +10650,6 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<mask_codefor>ashrv2di3<mask_name>" - [(set (match_operand:V2DI 0 "register_operand" "=v,v") - (ashiftrt:V2DI - (match_operand:V2DI 1 "nonimmediate_operand" "v,vm") - (match_operand:DI 2 "nonmemory_operand" "v,N")))] - "TARGET_AVX512VL" - "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "type" "sseishft") - (set (attr "length_immediate") - (if_then_else (match_operand 2 "const_int_operand") - (const_string "1") - (const_string "0"))) - (set_attr "mode" "TI")]) - (define_insn "ashr<mode>3<mask_name>" [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v") (ashiftrt:VI248_AVX512BW_AVX512VL Index: testsuite/gcc.target/i386/pr70799-5.c =================================================================== --- testsuite/gcc.target/i386/pr70799-5.c (nonexistent) +++ testsuite/gcc.target/i386/pr70799-5.c (working copy) @@ -0,0 +1,17 @@ +/* PR target/pr70799 */ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2 -march=slm -mavx512vl -mno-stackrealign" } */ +/* { dg-final { scan-assembler "psllq" } } */ +/* { dg-final { scan-assembler "psraq" } } */ + +long long a, b; + +void test1 (int c) +{ + a = b << c; +} + +void test2 (int c) +{ + a = b >> c; +}