This is a part of a WIP series doing vectorizer rework. To be committed when stage1 opens.
This removes the SLP_INSTANCE_GROUP_SIZE member since the number of lanes throughout a SLP subgraph is not necessarily constant. * tree-vectorizer.h (SLP_INSTANCE_GROUP_SIZE): Remove. (_slp_instance::group_size): Likewise. * ... --- gcc/tree-vect-data-refs.c | 12 ++++++------ gcc/tree-vect-loop.c | 2 +- gcc/tree-vect-slp.c | 46 ++++++++++++++++++++++++++-------------------- gcc/tree-vect-stmts.c | 26 +++++++++++++------------- gcc/tree-vectorizer.h | 10 +++++----- 5 files changed, 51 insertions(+), 45 deletions(-) diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 0192aa64636..db92c818287 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -695,7 +695,7 @@ vect_slp_analyze_data_ref_dependence (vec_info *vinfo, disambiguating the loads. */ static bool -vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node, +vect_slp_analyze_node_dependences (slp_tree node, vec<stmt_vec_info> stores, stmt_vec_info last_store_info) { @@ -704,7 +704,7 @@ vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node, group. */ stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node); vec_info *vinfo = last_access_info->vinfo; - for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k) + for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k) { stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k]; if (access_info == last_access_info) @@ -794,12 +794,12 @@ vect_slp_analyze_instance_dependence (slp_instance instance) stmt_vec_info last_store_info = NULL; if (store) { - if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL)) + if (! vect_slp_analyze_node_dependences (store, vNULL, NULL)) return false; /* Mark stores in this instance and remember the last one. */ last_store_info = vect_find_last_scalar_stmt_in_slp (store); - for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k) + for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k) gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true); } @@ -810,7 +810,7 @@ vect_slp_analyze_instance_dependence (slp_instance instance) slp_tree load; unsigned int i; FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load) - if (! vect_slp_analyze_node_dependences (instance, load, + if (! vect_slp_analyze_node_dependences (load, store ? SLP_TREE_SCALAR_STMTS (store) : vNULL, last_store_info)) @@ -821,7 +821,7 @@ vect_slp_analyze_instance_dependence (slp_instance instance) /* Unset the visited flag. */ if (store) - for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k) + for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k) gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false); return res; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 53fccb715ef..e7c3daefb92 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6527,7 +6527,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, which each SLP statement has its own initial value and in which that value needs to be repeated for every instance of the statement within the initial vector. */ - unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); + unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); if (!neutral_op && !can_duplicate_and_interleave_p (loop_vinfo, group_size, TREE_TYPE (vectype_out))) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 9d17e3386fa..3308e1791f1 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1846,7 +1846,6 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, static bool vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) { - unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); unsigned int i, j; unsigned int lidx; slp_tree node, load; @@ -1854,14 +1853,16 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) /* Compare all the permutation sequences to the first one. We know that at least one load is permuted. */ node = SLP_INSTANCE_LOADS (slp_instn)[0]; - if (!node->load_permutation.exists ()) + if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) return false; + unsigned int group_size = SLP_TREE_LOAD_PERMUTATION (node).length (); for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i) { - if (!load->load_permutation.exists ()) + if (!SLP_TREE_LOAD_PERMUTATION (load).exists () + || SLP_TREE_LOAD_PERMUTATION (load).length () != group_size) return false; - FOR_EACH_VEC_ELT (load->load_permutation, j, lidx) - if (lidx != node->load_permutation[j]) + FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load), j, lidx) + if (lidx != SLP_TREE_LOAD_PERMUTATION (node)[j]) return false; } @@ -1962,7 +1963,6 @@ vect_gather_slp_loads (slp_instance inst, slp_tree node) static bool vect_supported_load_permutation_p (slp_instance slp_instn) { - unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); unsigned int i, j, k, next; slp_tree node; @@ -1974,7 +1974,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn) FOR_EACH_VEC_ELT (node->load_permutation, j, next) dump_printf (MSG_NOTE, "%d ", next); else - for (k = 0; k < group_size; ++k) + for (k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k) dump_printf (MSG_NOTE, "%d ", k); dump_printf (MSG_NOTE, "\n"); } @@ -1988,6 +1988,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn) /* Check that all the load nodes are of the same size. */ /* ??? Can't we assert this? */ + unsigned int group_size + = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (slp_instn)).length (); FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) return false; @@ -2054,7 +2056,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn) vec<tree> tem; unsigned n_perms; if (!vect_transform_slp_perm_load (node, tem, NULL, - 1, slp_instn, true, &n_perms)) + 1, true, &n_perms)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, @@ -2080,7 +2082,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn) FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, - slp_instn, true, &n_perms)) + true, &n_perms)) return false; return true; @@ -2313,7 +2315,6 @@ vect_analyze_slp_instance (vec_info *vinfo, /* Create a new SLP instance. */ new_instance = XNEW (class _slp_instance); SLP_INSTANCE_TREE (new_instance) = node; - SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_LOADS (new_instance) = vNULL; SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL; @@ -2430,6 +2431,12 @@ vect_analyze_slp_instance (vec_info *vinfo, vinfo->slp_instances.safe_push (new_instance); + /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with + the number of scalar stmts in the root in a few places. + Verify that assumption holds. */ + gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance)) + .length () == group_size); + if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, @@ -2763,7 +2770,7 @@ vect_detect_hybrid_slp (loop_vec_info loop_vinfo) hash_map<slp_tree, unsigned> visited; bool any = false; FOR_EACH_VEC_ELT (slp_instances, i, instance) - if (j < SLP_INSTANCE_GROUP_SIZE (instance)) + if (j < SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ()) { any = true; vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance), @@ -2849,7 +2856,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, vf = loop_vinfo->vectorization_factor; else vf = 1; - unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); + unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length (); tree vectype = STMT_VINFO_VECTYPE (stmt_info); SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_vectors (vf * group_size, vectype); @@ -3135,7 +3142,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) FOR_EACH_VEC_ELT (slp_instances, i, instance) { auto_vec<bool, 20> life; - life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); + life.safe_grow_cleared + (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ()); vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), SLP_INSTANCE_TREE (instance), &life, &scalar_costs, visited); @@ -3965,20 +3973,19 @@ vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n) /* Generate vector permute statements from a list of loads in DR_CHAIN. If ANALYZE_ONLY is TRUE, only check that it is possible to create valid - permute statements for the SLP node NODE of the SLP instance - SLP_NODE_INSTANCE. */ + permute statements for the SLP node NODE. */ bool vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, gimple_stmt_iterator *gsi, poly_uint64 vf, - slp_instance slp_node_instance, bool analyze_only, + bool analyze_only, unsigned *n_perms) { stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; vec_info *vinfo = stmt_info->vinfo; int vec_index = 0; tree vectype = STMT_VINFO_VECTYPE (stmt_info); - unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); + unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length (); unsigned int mask_element; machine_mode mode; @@ -4213,7 +4220,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance) /* VECTYPE is the type of the destination. */ vectype = STMT_VINFO_VECTYPE (stmt_info); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - group_size = SLP_INSTANCE_GROUP_SIZE (instance); + group_size = SLP_TREE_SCALAR_STMTS (node).length (); gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0); SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); @@ -4454,8 +4461,7 @@ vect_schedule_slp (vec_info *vinfo) if (is_a <loop_vec_info> (vinfo)) vect_remove_slp_scalar_calls (root); - for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info) - && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) + for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info); j++) { if (!STMT_VINFO_DATA_REF (store_info)) break; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 2ca8e494680..3d4c18efe92 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1168,9 +1168,8 @@ vect_get_store_cost (stmt_vec_info stmt_info, int ncopies, access scheme chosen. */ static void -vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies, +vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf, vect_memory_access_type memory_access_type, - slp_instance instance, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { @@ -1192,10 +1191,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies, unsigned n_perms; unsigned assumed_nunits = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info)); - unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size; - vect_transform_slp_perm_load (slp_node, vNULL, NULL, - slp_vf, instance, true, - &n_perms); + vect_transform_slp_perm_load (slp_node, vNULL, NULL, vf, true, &n_perms); inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm, first_stmt_info, 0, vect_body); /* And adjust the number of loads performed. This handles @@ -8740,8 +8736,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, memory_access_type, &gs_info, mask); STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; - vect_model_load_cost (stmt_info, ncopies, memory_access_type, - slp_node_instance, slp_node, cost_vec); + vect_model_load_cost (stmt_info, ncopies, vf, memory_access_type, + slp_node, cost_vec); return true; } @@ -9058,7 +9054,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { unsigned n_perms; vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, - slp_node_instance, false, &n_perms); + false, &n_perms); } return true; } @@ -9108,8 +9104,13 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, unpermuted sequence. In other cases we need to load the whole group, not only the number of vector stmts the permutation result fits in. */ + /* ??? There is no such thing as a common group size, implement + the stuff below in other ways. */ + unsigned inst_group_size + = SLP_TREE_SCALAR_STMTS + (SLP_INSTANCE_TREE (slp_node_instance)).length (); if (slp_perm - && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance) + && (group_size != inst_group_size || !multiple_p (nunits, group_size))) { /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for @@ -9123,7 +9124,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); group_gap_adj - = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance); + = group_size - inst_group_size; } } else @@ -9765,8 +9766,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { unsigned n_perms; if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, - slp_node_instance, false, - &n_perms)) + false, &n_perms)) { dr_chain.release (); return false; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f7becb34ab4..2acaba7d120 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -119,13 +119,16 @@ typedef struct _slp_tree *slp_tree; struct _slp_tree { /* Nodes that contain def-stmts of this node statements operands. */ vec<slp_tree> children; + /* A group of scalar stmts to be vectorized together. */ vec<stmt_vec_info> stmts; /* A group of scalar operands to be vectorized together. */ vec<tree> ops; + /* Load permutation relative to the stores, NULL if there is no permutation. */ vec<unsigned> load_permutation; + /* Vectorized stmt/s. */ vec<stmt_vec_info> vec_stmts; /* Number of vector stmts that are created to replace the group of scalar @@ -133,6 +136,7 @@ struct _slp_tree { scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector size. */ unsigned int vec_stmts_size; + /* Reference count in the SLP graph. */ unsigned int refcnt; /* The maximum number of vector elements for the subtree rooted @@ -156,9 +160,6 @@ public: from, NULL otherwise. */ stmt_vec_info root_stmt; - /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ - unsigned int group_size; - /* The unrolling factor required to vectorized this SLP instance. */ poly_uint64 unrolling_factor; @@ -172,7 +173,6 @@ public: /* Access Functions. */ #define SLP_INSTANCE_TREE(S) (S)->root -#define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor #define SLP_INSTANCE_LOADS(S) (S)->loads #define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt @@ -1848,7 +1848,7 @@ extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); extern void vect_free_slp_instance (slp_instance, bool); extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , gimple_stmt_iterator *, poly_uint64, - slp_instance, bool, unsigned *); + bool, unsigned *); extern bool vect_slp_analyze_operations (vec_info *); extern void vect_schedule_slp (vec_info *); extern opt_result vect_analyze_slp (vec_info *, unsigned); -- 2.13.7