10 Regression] SIMD not generated for horizontal sum of bytes in array

jakub at gcc dot gnu.org Tue, 30 Jul 2019 03:18:55 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91201


--- Comment #7 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Untested patch to add the reduc_plus_scal_v{16,32,64}qi expanders.
Wonder if we don't need also reduc_plus_scal_v8qi expander for
TARGET_MMX_WITH_SSE.

--- gcc/config/i386/sse.md.jj   2019-07-28 17:29:41.488143221 +0200
+++ gcc/config/i386/sse.md      2019-07-30 12:05:34.249034097 +0200
@@ -2728,9 +2728,30 @@ (define_expand "reduc_plus_scal_<mode>"
   DONE;
 })

+(define_expand "reduc_plus_scal_v16qi"
+ [(plus:V16QI
+    (match_operand:QI 0 "register_operand")
+    (match_operand:V16QI 1 "register_operand"))]
+ "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (V1TImode);
+  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
+                                GEN_INT (64)));
+  rtx tmp2 = gen_reg_rtx (V16QImode);
+  emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
+  rtx tmp3 = gen_reg_rtx (V16QImode);
+  emit_move_insn (tmp3, CONST0_RTX (V16QImode));
+  rtx tmp4 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
+  tmp4 = gen_lowpart (V16QImode, tmp4);
+  emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
+  DONE;
+})
+
 (define_mode_iterator REDUC_PLUS_MODE
  [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
-  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
+  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+  (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])

 (define_expand "reduc_plus_scal_<mode>"
  [(plus:REDUC_PLUS_MODE
@@ -2741,8 +2762,8 @@ (define_expand "reduc_plus_scal_<mode>"
   rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
   emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
   rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
-  emit_insn (gen_add<ssehalfvecmodelower>3
-    (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
+  rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+  emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
   emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
   DONE;
 })

[Bug tree-optimization/91201] [7/8/9/10 Regression] SIMD not generated for horizontal sum of bytes in array

Reply via email to