This applies some refactoring to vect_build_slp_tree to make a fix for PR66856 easier (where I need to add some reference counting code to stmts in SLP tree). Doing this w/o refactoring turned out to be a "bit" unwieldly.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2016-01-14 Richard Biener <rguent...@suse.de> PR tree-optimization/66856 * tree-vect-slp.c (vect_build_slp_tree): Refactor to build SLP node only if it built successfully. (vect_analyze_slp_instance): Adjust. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 232315) --- gcc/tree-vect-slp.c (working copy) *************** vect_build_slp_tree_1 (vec_info *vinfo, *** 834,853 **** The value returned is the depth in the SLP tree where a mismatch was found. */ ! static bool vect_build_slp_tree (vec_info *vinfo, ! slp_tree *node, unsigned int group_size, unsigned int *max_nunits, vec<slp_tree> *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, unsigned max_tree_size) { ! unsigned nops, i, this_tree_size = 0; gimple *stmt; matches[0] = false; ! stmt = SLP_TREE_SCALAR_STMTS (*node)[0]; if (is_gimple_call (stmt)) nops = gimple_call_num_args (stmt); else if (is_gimple_assign (stmt)) --- 834,854 ---- The value returned is the depth in the SLP tree where a mismatch was found. */ ! static slp_tree vect_build_slp_tree (vec_info *vinfo, ! vec<gimple *> stmts, unsigned int group_size, unsigned int *max_nunits, vec<slp_tree> *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, unsigned max_tree_size) { ! unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits; gimple *stmt; + slp_tree node; matches[0] = false; ! stmt = stmts[0]; if (is_gimple_call (stmt)) nops = gimple_call_num_args (stmt); else if (is_gimple_assign (stmt)) *************** vect_build_slp_tree (vec_info *vinfo, *** 857,883 **** nops++; } else ! return false; bool two_operators = false; if (!vect_build_slp_tree_1 (vinfo, ! SLP_TREE_SCALAR_STMTS (*node), group_size, nops, ! max_nunits, matches, &two_operators)) ! return false; ! SLP_TREE_TWO_OPERATORS (*node) = two_operators; /* If the SLP node is a load, terminate the recursion. */ if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) { ! loads->safe_push (*node); ! return true; } /* Get at the operands, verifying they are compatible. */ vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); slp_oprnd_info oprnd_info; ! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt) { switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info)) { --- 858,885 ---- nops++; } else ! return NULL; bool two_operators = false; if (!vect_build_slp_tree_1 (vinfo, ! stmts, group_size, nops, ! &this_max_nunits, matches, &two_operators)) ! return NULL; /* If the SLP node is a load, terminate the recursion. */ if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) { ! *max_nunits = this_max_nunits; ! node = vect_create_new_slp_node (stmts); ! loads->safe_push (node); ! return node; } /* Get at the operands, verifying they are compatible. */ vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); slp_oprnd_info oprnd_info; ! FOR_EACH_VEC_ELT (stmts, i, stmt) { switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info)) { *************** vect_build_slp_tree (vec_info *vinfo, *** 886,892 **** case -1: matches[0] = false; vect_free_oprnd_info (oprnds_info); ! return false; case 1: matches[i] = false; break; --- 888,894 ---- case -1: matches[0] = false; vect_free_oprnd_info (oprnds_info); ! return NULL; case 1: matches[i] = false; break; *************** vect_build_slp_tree (vec_info *vinfo, *** 896,938 **** if (!matches[i]) { vect_free_oprnd_info (oprnds_info); ! return false; } ! stmt = SLP_TREE_SCALAR_STMTS (*node)[0]; /* Create SLP_TREE nodes for the definition node/s. */ FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) { slp_tree child; ! unsigned old_nloads = loads->length (); ! unsigned old_max_nunits = *max_nunits; if (oprnd_info->first_dt != vect_internal_def) continue; if (++this_tree_size > max_tree_size) { vect_free_oprnd_info (oprnds_info); ! return false; ! } ! ! child = vect_create_new_slp_node (oprnd_info->def_stmts); ! if (!child) ! { ! vect_free_oprnd_info (oprnds_info); ! return false; } ! if (vect_build_slp_tree (vinfo, &child, ! group_size, max_nunits, loads, matches, ! npermutes, &this_tree_size, max_tree_size)) { /* If we have all children of child built up from scalars then just throw that away and build it up this node from scalars. */ if (!SLP_TREE_CHILDREN (child).is_empty ()) { - unsigned int j; slp_tree grandchild; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) --- 898,940 ---- if (!matches[i]) { vect_free_oprnd_info (oprnds_info); ! return NULL; } ! auto_vec<slp_tree, 4> children; ! auto_vec<slp_tree> this_loads; ! ! stmt = stmts[0]; /* Create SLP_TREE nodes for the definition node/s. */ FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) { slp_tree child; ! unsigned old_nloads = this_loads.length (); ! unsigned old_tree_size = this_tree_size; ! unsigned int j; if (oprnd_info->first_dt != vect_internal_def) continue; if (++this_tree_size > max_tree_size) { + FOR_EACH_VEC_ELT (children, j, child) + vect_free_slp_tree (child); vect_free_oprnd_info (oprnds_info); ! return NULL; } ! if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, ! group_size, &this_max_nunits, ! &this_loads, matches, npermutes, ! &this_tree_size, ! max_tree_size)) != NULL) { /* If we have all children of child built up from scalars then just throw that away and build it up this node from scalars. */ if (!SLP_TREE_CHILDREN (child).is_empty ()) { slp_tree grandchild; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) *************** vect_build_slp_tree (vec_info *vinfo, *** 941,948 **** if (!grandchild) { /* Roll back. */ ! *max_nunits = old_max_nunits; ! loads->truncate (old_nloads); FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) vect_free_slp_tree (grandchild); SLP_TREE_CHILDREN (child).truncate (0); --- 943,950 ---- if (!grandchild) { /* Roll back. */ ! this_loads.truncate (old_nloads); ! this_tree_size = old_tree_size; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) vect_free_slp_tree (grandchild); SLP_TREE_CHILDREN (child).truncate (0); *************** vect_build_slp_tree (vec_info *vinfo, *** 952,964 **** "scalars instead\n"); oprnd_info->def_stmts = vNULL; SLP_TREE_DEF_TYPE (child) = vect_external_def; ! SLP_TREE_CHILDREN (*node).quick_push (child); continue; } } oprnd_info->def_stmts = vNULL; ! SLP_TREE_CHILDREN (*node).quick_push (child); continue; } --- 954,966 ---- "scalars instead\n"); oprnd_info->def_stmts = vNULL; SLP_TREE_DEF_TYPE (child) = vect_external_def; ! children.safe_push (child); continue; } } oprnd_info->def_stmts = vNULL; ! children.safe_push (child); continue; } *************** vect_build_slp_tree (vec_info *vinfo, *** 977,997 **** scalar version. */ && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) { - unsigned int j; - slp_tree grandchild; - - /* Roll back. */ - *max_nunits = old_max_nunits; - loads->truncate (old_nloads); - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - vect_free_slp_tree (grandchild); - SLP_TREE_CHILDREN (child).truncate (0); - dump_printf_loc (MSG_NOTE, vect_location, "Building vector operands from scalars\n"); ! oprnd_info->def_stmts = vNULL; SLP_TREE_DEF_TYPE (child) = vect_external_def; ! SLP_TREE_CHILDREN (*node).quick_push (child); continue; } --- 979,990 ---- scalar version. */ && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) { dump_printf_loc (MSG_NOTE, vect_location, "Building vector operands from scalars\n"); ! child = vect_create_new_slp_node (oprnd_info->def_stmts); SLP_TREE_DEF_TYPE (child) = vect_external_def; ! children.safe_push (child); ! oprnd_info->def_stmts = vNULL; continue; } *************** vect_build_slp_tree (vec_info *vinfo, *** 1007,1029 **** && oprnds_info[1]->first_dt == vect_internal_def && is_gimple_assign (stmt) && commutative_tree_code (gimple_assign_rhs_code (stmt)) ! && !SLP_TREE_TWO_OPERATORS (*node) /* Do so only if the number of not successful permutes was nor more than a cut-ff as re-trying the recursive match on possibly each level of the tree would expose exponential behavior. */ && *npermutes < 4) { - unsigned int j; - slp_tree grandchild; - - /* Roll back. */ - *max_nunits = old_max_nunits; - loads->truncate (old_nloads); - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - vect_free_slp_tree (grandchild); - SLP_TREE_CHILDREN (child).truncate (0); - /* Swap mismatched definition stmts. */ dump_printf_loc (MSG_NOTE, vect_location, "Re-trying with swapped operands of stmts "); --- 1000,1012 ---- && oprnds_info[1]->first_dt == vect_internal_def && is_gimple_assign (stmt) && commutative_tree_code (gimple_assign_rhs_code (stmt)) ! && ! two_operators /* Do so only if the number of not successful permutes was nor more than a cut-ff as re-trying the recursive match on possibly each level of the tree would expose exponential behavior. */ && *npermutes < 4) { /* Swap mismatched definition stmts. */ dump_printf_loc (MSG_NOTE, vect_location, "Re-trying with swapped operands of stmts "); *************** vect_build_slp_tree (vec_info *vinfo, *** 1037,1046 **** dump_printf (MSG_NOTE, "\n"); /* And try again with scratch 'matches' ... */ bool *tem = XALLOCAVEC (bool, group_size); ! if (vect_build_slp_tree (vinfo, &child, ! group_size, max_nunits, loads, ! tem, npermutes, &this_tree_size, ! max_tree_size)) { /* ... so if successful we can apply the operand swapping to the GIMPLE IL. This is necessary because for example --- 1020,1030 ---- dump_printf (MSG_NOTE, "\n"); /* And try again with scratch 'matches' ... */ bool *tem = XALLOCAVEC (bool, group_size); ! if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, ! group_size, &this_max_nunits, ! &this_loads, tem, npermutes, ! &this_tree_size, ! max_tree_size)) != NULL) { /* ... so if successful we can apply the operand swapping to the GIMPLE IL. This is necessary because for example *************** vect_build_slp_tree (vec_info *vinfo, *** 1050,1061 **** we'll continue to process swapped operand two. */ for (j = 0; j < group_size; ++j) { ! gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j]; gimple_set_plf (stmt, GF_PLF_1, false); } for (j = 0; j < group_size; ++j) { ! gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j]; if (!matches[j]) { /* Avoid swapping operands twice. */ --- 1034,1045 ---- we'll continue to process swapped operand two. */ for (j = 0; j < group_size; ++j) { ! gimple *stmt = stmts[j]; gimple_set_plf (stmt, GF_PLF_1, false); } for (j = 0; j < group_size; ++j) { ! gimple *stmt = stmts[j]; if (!matches[j]) { /* Avoid swapping operands twice. */ *************** vect_build_slp_tree (vec_info *vinfo, *** 1070,1076 **** if (flag_checking) for (j = 0; j < group_size; ++j) { ! gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j]; gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]); } --- 1054,1060 ---- if (flag_checking) for (j = 0; j < group_size; ++j) { ! gimple *stmt = stmts[j]; gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]); } *************** vect_build_slp_tree (vec_info *vinfo, *** 1087,1094 **** if (!grandchild) { /* Roll back. */ ! *max_nunits = old_max_nunits; ! loads->truncate (old_nloads); FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) vect_free_slp_tree (grandchild); SLP_TREE_CHILDREN (child).truncate (0); --- 1071,1078 ---- if (!grandchild) { /* Roll back. */ ! this_loads.truncate (old_nloads); ! this_tree_size = old_tree_size; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) vect_free_slp_tree (grandchild); SLP_TREE_CHILDREN (child).truncate (0); *************** vect_build_slp_tree (vec_info *vinfo, *** 1098,1127 **** "scalars instead\n"); oprnd_info->def_stmts = vNULL; SLP_TREE_DEF_TYPE (child) = vect_external_def; ! SLP_TREE_CHILDREN (*node).quick_push (child); continue; } } oprnd_info->def_stmts = vNULL; ! SLP_TREE_CHILDREN (*node).quick_push (child); continue; } ++*npermutes; } ! oprnd_info->def_stmts = vNULL; ! vect_free_slp_tree (child); vect_free_oprnd_info (oprnds_info); ! return false; } if (tree_size) *tree_size += this_tree_size; ! vect_free_oprnd_info (oprnds_info); ! return true; } /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ --- 1082,1118 ---- "scalars instead\n"); oprnd_info->def_stmts = vNULL; SLP_TREE_DEF_TYPE (child) = vect_external_def; ! children.safe_push (child); continue; } } oprnd_info->def_stmts = vNULL; ! children.safe_push (child); continue; } ++*npermutes; } ! gcc_assert (child == NULL); ! FOR_EACH_VEC_ELT (children, j, child) ! vect_free_slp_tree (child); vect_free_oprnd_info (oprnds_info); ! return NULL; } + vect_free_oprnd_info (oprnds_info); + if (tree_size) *tree_size += this_tree_size; + *max_nunits = this_max_nunits; + loads->safe_splice (this_loads); ! node = vect_create_new_slp_node (stmts); ! SLP_TREE_TWO_OPERATORS (node) = two_operators; ! SLP_TREE_CHILDREN (node).splice (children); ! return node; } /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ *************** vect_analyze_slp_instance (vec_info *vin *** 1733,1748 **** scalar_stmts.safe_push (next); } - node = vect_create_new_slp_node (scalar_stmts); - loads.create (group_size); /* Build the tree for the SLP instance. */ bool *matches = XALLOCAVEC (bool, group_size); unsigned npermutes = 0; ! if (vect_build_slp_tree (vinfo, &node, group_size, ! &max_nunits, &loads, ! matches, &npermutes, NULL, max_tree_size)) { /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) --- 1724,1737 ---- scalar_stmts.safe_push (next); } loads.create (group_size); /* Build the tree for the SLP instance. */ bool *matches = XALLOCAVEC (bool, group_size); unsigned npermutes = 0; ! if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, ! &max_nunits, &loads, matches, &npermutes, ! NULL, max_tree_size)) != NULL) { /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) *************** vect_analyze_slp_instance (vec_info *vin *** 1864,1870 **** /* Failed to SLP. */ /* Free the allocated memory. */ ! vect_free_slp_tree (node); loads.release (); /* For basic block SLP, try to break the group up into multiples of the --- 1853,1859 ---- /* Failed to SLP. */ /* Free the allocated memory. */ ! scalar_stmts.release (); loads.release (); /* For basic block SLP, try to break the group up into multiples of the