https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91201
--- Comment #7 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Untested patch to add the reduc_plus_scal_v{16,32,64}qi expanders. Wonder if we don't need also reduc_plus_scal_v8qi expander for TARGET_MMX_WITH_SSE. --- gcc/config/i386/sse.md.jj 2019-07-28 17:29:41.488143221 +0200 +++ gcc/config/i386/sse.md 2019-07-30 12:05:34.249034097 +0200 @@ -2728,9 +2728,30 @@ (define_expand "reduc_plus_scal_<mode>" DONE; }) +(define_expand "reduc_plus_scal_v16qi" + [(plus:V16QI + (match_operand:QI 0 "register_operand") + (match_operand:V16QI 1 "register_operand"))] + "TARGET_SSE2" +{ + rtx tmp = gen_reg_rtx (V1TImode); + emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]), + GEN_INT (64))); + rtx tmp2 = gen_reg_rtx (V16QImode); + emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp))); + rtx tmp3 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp3, CONST0_RTX (V16QImode)); + rtx tmp4 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3)); + tmp4 = gen_lowpart (V16QImode, tmp4); + emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx)); + DONE; +}) + (define_mode_iterator REDUC_PLUS_MODE [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX") - (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")]) + (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")]) (define_expand "reduc_plus_scal_<mode>" [(plus:REDUC_PLUS_MODE @@ -2741,8 +2762,8 @@ (define_expand "reduc_plus_scal_<mode>" rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_add<ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); + rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]); + emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3)); emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2)); DONE; })