The following fixes a missed check in vectorizable-reduction. We cannot handle the case where we have a lane-reducing reduction operation like DOT_PROD_EXPR with not using a single def-use cycle because we need individual reduction vector elements in other vector stmts.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2017-06-18 Richard Biener <rguent...@suse.de> PR tree-optimization/81418 * tree-vect-loop.c (vectorizable_reduction): Properly compute vectype_in. Verify that with lane-reducing reduction operations we have a single def-use cycle. * gcc.dg/torture/pr81418.c: New testcase. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 250270) +++ gcc/tree-vect-loop.c (working copy) @@ -5642,7 +5642,10 @@ vectorizable_reduction (gimple *stmt, gi if (k == 1 && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR) continue; - vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); + tem = get_vectype_for_scalar_type (TREE_TYPE (op)); + if (! vectype_in + || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in)) + vectype_in = tem; break; } gcc_assert (vectype_in); @@ -6213,26 +6216,6 @@ vectorizable_reduction (gimple *stmt, gi } } - if (!vec_stmt) /* transformation not required. */ - { - if (first_p) - vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies); - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; - return true; - } - - /* Transform. */ - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n"); - - /* FORNOW: Multiple types are not supported for condition. */ - if (code == COND_EXPR) - gcc_assert (ncopies == 1); - - /* Create the destination vector */ - vec_dest = vect_create_destination_var (scalar_dest, vectype_out); - /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the @@ -6276,6 +6259,41 @@ vectorizable_reduction (gimple *stmt, gi else epilog_copies = ncopies; + /* If the reduction stmt is one of the patterns that have lane + reduction embedded we cannot handle the case of ! single_defuse_cycle. */ + if ((ncopies > 1 + && ! single_defuse_cycle) + && (code == DOT_PROD_EXPR + || code == WIDEN_SUM_EXPR + || code == SAD_EXPR)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multi def-use cycle not possible for lane-reducing " + "reduction operation\n"); + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + if (first_p) + vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies); + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + return true; + } + + /* Transform. */ + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n"); + + /* FORNOW: Multiple types are not supported for condition. */ + if (code == COND_EXPR) + gcc_assert (ncopies == 1); + + /* Create the destination vector */ + vec_dest = vect_create_destination_var (scalar_dest, vectype_out); + prev_stmt_info = NULL; prev_phi_info = NULL; if (slp_node) Index: gcc/testsuite/gcc.dg/torture/pr81418.c =================================================================== --- gcc/testsuite/gcc.dg/torture/pr81418.c (nonexistent) +++ gcc/testsuite/gcc.dg/torture/pr81418.c (working copy) @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ftree-loop-optimize" } */ + +int +ol (int ku) +{ + int zq = 0; + + while (ku < 1) + { + int y6; + + for (y6 = 0; y6 < 3; ++y6) + zq += (char)ku; + ++ku; + } + + return zq; +}