This patch changes the widen_[us]sum optabs into a convert optabs such that
targets and specify more than one conversion.

Following this patch are patches rewriting all targets using this change.

While working on this I noticed that the pattern does miss some cases it
could handle if it tried multiple attempts. e.g. if the promotion is from
qi to si, and the target doesn't have this, it should try hi -> si.

But I'm leaving that for now.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        PR middle-end/122069
        * doc/md.texi (widen_ssum@var{n}@var{m}3, widen_usum@var{n}@var{m}3):
        Update docs.
        * optabs.cc (expand_widen_pattern_expr): Add WIDEN_SUM_EXPR as widening.
        * optabs.def (ssum_widen_optab, usum_widen_optab): Convert from direct
        to a conversion optab.
        * tree-vect-patterns.cc (vect_recog_widen_sum_pattern): Change
        vect_supportable_direct_optab_p into vect_supportable_conv_optab_p.

gcc/testsuite/ChangeLog:

        PR middle-end/122069
        * gcc.dg/vect/slp-reduc-3.c: vect_widen_sum_hi_to_si_pattern targets now
        pass.

---
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 
44e1149bea89b18903061713e8319d834b76adbf..97d21b90a650e5e5fad5cd72b01f30983ca4ab43
 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5847,15 +5847,15 @@ equal or wider than the mode of the absolute 
difference. The result is placed
 in operand 0, which is of the same mode as operand 3.
 @var{m} is the mode of operand 1 and operand 2.
 
-@cindex @code{widen_ssum@var{m}3} instruction pattern
-@cindex @code{widen_usum@var{m}3} instruction pattern
-@item @samp{widen_ssum@var{m}3}
-@itemx @samp{widen_usum@var{m}3}
+@cindex @code{widen_ssum@var{n}@var{m}3} instruction pattern
+@cindex @code{widen_usum@var{n}@var{m}3} instruction pattern
+@item @samp{widen_ssum@var{n}@var{m}3}
+@itemx @samp{widen_usum@var{n}@var{m}3}
 Operands 0 and 2 are of the same mode, which is wider than the mode of
 operand 1. Add operand 1 to operand 2 and place the widened result in
 operand 0. (This is used express accumulation of elements into an accumulator
 of a wider mode.)
-@var{m} is the mode of operand 1.
+@var{m} is the mode of operand 1 and @var{n} is the mode of operand 0.
 
 @cindex @code{smulhs@var{m}3} instruction pattern
 @cindex @code{umulhs@var{m}3} instruction pattern
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 
5c9450f61450fa4425d08339a1c2b5f7f5e654ec..0865fc2e19aeb2b3056c8634334d6c1644a3cc96
 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -322,6 +322,10 @@ expand_widen_pattern_expr (const_sepops ops, rtx op0, rtx 
op1, rtx wide_op,
     icode = find_widening_optab_handler (widen_pattern_optab,
                                         TYPE_MODE (TREE_TYPE (ops->op2)),
                                         tmode0);
+  else if (ops->code == WIDEN_SUM_EXPR)
+    icode = find_widening_optab_handler (widen_pattern_optab,
+                                        TYPE_MODE (TREE_TYPE (ops->op1)),
+                                        tmode0);
   else
     icode = optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 
790e43f08f476c8025dc2797f9ecaffe5b66acc5..e2ffb2b6423893b5dd757af1ed3f342ce8c9f76a
 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -85,6 +85,8 @@ OPTAB_CD(smsub_widen_optab, "msub$b$a4")
 OPTAB_CD(umsub_widen_optab, "umsub$b$a4")
 OPTAB_CD(ssmsub_widen_optab, "ssmsub$b$a4")
 OPTAB_CD(usmsub_widen_optab, "usmsub$a$b4")
+OPTAB_CD(ssum_widen_optab, "widen_ssum$I$a$b3")
+OPTAB_CD(usum_widen_optab, "widen_usum$I$a$b3")
 OPTAB_CD(crc_optab, "crc$a$b4")
 OPTAB_CD(crc_rev_optab, "crc_rev$a$b4")
 OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b")
@@ -415,8 +417,6 @@ OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
 OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
-OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
-OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
 OPTAB_D (usad_optab, "usad$I$a")
 OPTAB_D (ssad_optab, "ssad$I$a")
 OPTAB_D (smulhs_optab, "smulhs$a3")
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c 
b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
index 
614d8ad17ca1629af9f43cedec3cbed197d9a582..b8aff98990b202eae2a7c367457113aa1b811eda
 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
@@ -60,6 +60,6 @@ int main (void)
 /* The initialization loop in main also gets vectorized.  */
 /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 
"vect" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } */ 
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
xfail { vect_widen_sum_hi_to_si_pattern || { ! { vect_short_mult && { 
vect_widen_sum_hi_to_si  && vect_unpack } } } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
xfail { ! { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } 
} } } */
 /* Check we can elide permutes if SLP vectorizing the reduction.  */
 /* { dg-final { scan-tree-dump-times " = VEC_PERM_EXPR" 0 "vect" { xfail { { { 
vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } && { ! vect_load_lanes } 
} && { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } } 
*/
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 
782327235db16384c2d71186911802daf7a15ebc..38695647f602792909c486ae52a3fbf8cc28b39e
 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -2544,8 +2544,8 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
 
   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
 
-  if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
-                                       unprom0.type, type_out))
+  if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
+                                     unprom0.type, type_out))
     return NULL;
 
   var = vect_recog_temp_ssa_var (type, NULL);


-- 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 44e1149bea89b18903061713e8319d834b76adbf..97d21b90a650e5e5fad5cd72b01f30983ca4ab43 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5847,15 +5847,15 @@ equal or wider than the mode of the absolute difference. The result is placed
 in operand 0, which is of the same mode as operand 3.
 @var{m} is the mode of operand 1 and operand 2.
 
-@cindex @code{widen_ssum@var{m}3} instruction pattern
-@cindex @code{widen_usum@var{m}3} instruction pattern
-@item @samp{widen_ssum@var{m}3}
-@itemx @samp{widen_usum@var{m}3}
+@cindex @code{widen_ssum@var{n}@var{m}3} instruction pattern
+@cindex @code{widen_usum@var{n}@var{m}3} instruction pattern
+@item @samp{widen_ssum@var{n}@var{m}3}
+@itemx @samp{widen_usum@var{n}@var{m}3}
 Operands 0 and 2 are of the same mode, which is wider than the mode of
 operand 1. Add operand 1 to operand 2 and place the widened result in
 operand 0. (This is used express accumulation of elements into an accumulator
 of a wider mode.)
-@var{m} is the mode of operand 1.
+@var{m} is the mode of operand 1 and @var{n} is the mode of operand 0.
 
 @cindex @code{smulhs@var{m}3} instruction pattern
 @cindex @code{umulhs@var{m}3} instruction pattern
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 5c9450f61450fa4425d08339a1c2b5f7f5e654ec..0865fc2e19aeb2b3056c8634334d6c1644a3cc96 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -322,6 +322,10 @@ expand_widen_pattern_expr (const_sepops ops, rtx op0, rtx op1, rtx wide_op,
     icode = find_widening_optab_handler (widen_pattern_optab,
 					 TYPE_MODE (TREE_TYPE (ops->op2)),
 					 tmode0);
+  else if (ops->code == WIDEN_SUM_EXPR)
+    icode = find_widening_optab_handler (widen_pattern_optab,
+					 TYPE_MODE (TREE_TYPE (ops->op1)),
+					 tmode0);
   else
     icode = optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 790e43f08f476c8025dc2797f9ecaffe5b66acc5..e2ffb2b6423893b5dd757af1ed3f342ce8c9f76a 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -85,6 +85,8 @@ OPTAB_CD(smsub_widen_optab, "msub$b$a4")
 OPTAB_CD(umsub_widen_optab, "umsub$b$a4")
 OPTAB_CD(ssmsub_widen_optab, "ssmsub$b$a4")
 OPTAB_CD(usmsub_widen_optab, "usmsub$a$b4")
+OPTAB_CD(ssum_widen_optab, "widen_ssum$I$a$b3")
+OPTAB_CD(usum_widen_optab, "widen_usum$I$a$b3")
 OPTAB_CD(crc_optab, "crc$a$b4")
 OPTAB_CD(crc_rev_optab, "crc_rev$a$b4")
 OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b")
@@ -415,8 +417,6 @@ OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
 OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
-OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
-OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
 OPTAB_D (usad_optab, "usad$I$a")
 OPTAB_D (ssad_optab, "ssad$I$a")
 OPTAB_D (smulhs_optab, "smulhs$a3")
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
index 614d8ad17ca1629af9f43cedec3cbed197d9a582..b8aff98990b202eae2a7c367457113aa1b811eda 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c
@@ -60,6 +60,6 @@ int main (void)
 /* The initialization loop in main also gets vectorized.  */
 /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } */ 
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_widen_sum_hi_to_si_pattern || { ! { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { ! { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } } */
 /* Check we can elide permutes if SLP vectorizing the reduction.  */
 /* { dg-final { scan-tree-dump-times " = VEC_PERM_EXPR" 0 "vect" { xfail { { { vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } && { ! vect_load_lanes } } && { vect_short_mult && { vect_widen_sum_hi_to_si  && vect_unpack } } } } } } */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 782327235db16384c2d71186911802daf7a15ebc..38695647f602792909c486ae52a3fbf8cc28b39e 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -2544,8 +2544,8 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
 
   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
 
-  if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
-					unprom0.type, type_out))
+  if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
+				      unprom0.type, type_out))
     return NULL;
 
   var = vect_recog_temp_ssa_var (type, NULL);

Reply via email to