The following makes sure to keep LOOP_VINFO_VECT_FACTOR at the indetermined value zero until it is final, making LOOP_VINFO_VECT_FACTOR an rvalue and changing some direct references to use the macro.
* tree-vectorizer.h (LOOP_VINFO_VECT_FACTOR): Make an rvalue. * tree-vect-loop.cc (vect_determine_vectorization_factor): Do not set LOOP_VINFO_VECT_FACTOR, return value via reference. (vect_update_vf_for_slp): Likewise. (vect_analyze_loop_2): Set LOOP_VINFO_VECT_FACTOR only ever to its final value. Perform SLP optimization after setting the vectorization factor. * tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Use LOOP_VINFO_VECT_FACTOR. (vect_slp_analyze_node_operations): Likewise. * tree-vectorizer.cc (vect_transform_loops): Likewise. --- gcc/tree-vect-loop.cc | 43 +++++++++++++++++++++++------------------- gcc/tree-vect-slp.cc | 4 ++-- gcc/tree-vectorizer.cc | 2 +- gcc/tree-vectorizer.h | 2 +- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 3a0731f3bea..3af4160426b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -282,12 +282,12 @@ vect_determine_vf_for_stmt (vec_info *vinfo, */ static opt_result -vect_determine_vectorization_factor (loop_vec_info loop_vinfo) +vect_determine_vectorization_factor (loop_vec_info loop_vinfo, + poly_uint64 &vectorization_factor) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); unsigned nbbs = loop->num_nodes; - poly_uint64 vectorization_factor = 1; tree scalar_type = NULL_TREE; gphi *phi; tree vectype; @@ -296,6 +296,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); + vectorization_factor = 1; + for (i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; @@ -370,7 +372,6 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) if (known_le (vectorization_factor, 1U)) return opt_result::failure_at (vect_location, "not vectorized: unsupported data-type\n"); - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; return opt_result::success (); } @@ -1937,17 +1938,16 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, statements update the vectorization factor. */ static void -vect_update_vf_for_slp (loop_vec_info loop_vinfo) +vect_update_vf_for_slp (loop_vec_info loop_vinfo, + poly_uint64 &vectorization_factor) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; - poly_uint64 vectorization_factor; int i; DUMP_VECT_SCOPE ("vect_update_vf_for_slp"); - vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); gcc_assert (known_ne (vectorization_factor, 0U)); /* If all the stmts in the loop can be SLPed, we perform only SLP, and @@ -2006,7 +2006,6 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo) LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); } - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, @@ -2809,7 +2808,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, return opt_result::failure_at (vect_location, "bad data dependence.\n"); LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf; - ok = vect_determine_vectorization_factor (loop_vinfo); + poly_uint64 vectorization_factor; + ok = vect_determine_vectorization_factor (loop_vinfo, vectorization_factor); if (!ok) { if (dump_enabled_p ()) @@ -2821,7 +2821,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, /* Compute the scalar iteration cost. */ vect_compute_single_scalar_iteration_cost (loop_vinfo); - poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + poly_uint64 saved_vectorization_factor = vectorization_factor; if (slp) { @@ -2839,13 +2839,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, vect_detect_hybrid_slp (loop_vinfo); /* Update the vectorization factor based on the SLP decision. */ - vect_update_vf_for_slp (loop_vinfo); - - /* Optimize the SLP graph with the vectorization factor fixed. */ - vect_optimize_slp (loop_vinfo); - - /* Gather the loads reachable from the SLP graph entries. */ - vect_gather_slp_loads (loop_vinfo); + vect_update_vf_for_slp (loop_vinfo, vectorization_factor); } } @@ -2863,11 +2857,12 @@ start_over: during finish_cost the first time we ran the analyzis for this vector mode. */ if (applying_suggested_uf) - LOOP_VINFO_VECT_FACTOR (loop_vinfo) *= loop_vinfo->suggested_unroll_factor; + vectorization_factor *= loop_vinfo->suggested_unroll_factor; /* Now the vectorization factor is final. */ - poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); gcc_assert (known_ne (vectorization_factor, 0U)); + gcc_assert (known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 0U)); + loop_vinfo->vectorization_factor = vectorization_factor; if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ()) { @@ -2882,6 +2877,15 @@ start_over: && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo))) return opt_result::failure_at (vect_location, "bad data dependence.\n"); + if (slp) + { + /* Optimize the SLP graph with the vectorization factor fixed. */ + vect_optimize_slp (loop_vinfo); + + /* Gather the loads reachable from the SLP graph entries. */ + vect_gather_slp_loads (loop_vinfo); + } + loop_vinfo->vector_costs = init_cost (loop_vinfo, false); /* Analyze the alignment of the data-refs in the loop. @@ -3303,7 +3307,8 @@ again: /* Roll back state appropriately. No SLP this time. */ slp = false; /* Restore vectorization factor as it were without SLP. */ - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor; + vectorization_factor = saved_vectorization_factor; + loop_vinfo->vectorization_factor = 0; /* Free the SLP instances. */ FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance) vect_free_slp_instance (instance); diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 6799b9375ae..efda358a7f6 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -6126,7 +6126,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, { poly_uint64 vf; if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) - vf = loop_vinfo->vectorization_factor; + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); else vf = 1; unsigned int group_size = SLP_TREE_LANES (node); @@ -6399,7 +6399,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, unsigned group_size = SLP_TREE_LANES (child); poly_uint64 vf = 1; if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) - vf = loop_vinfo->vectorization_factor; + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); SLP_TREE_NUMBER_OF_VEC_STMTS (child) = vect_get_num_vectors (vf * group_size, vector_type); /* And cost them. */ diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc index d97e2b54c25..08ff932fb53 100644 --- a/gcc/tree-vectorizer.cc +++ b/gcc/tree-vectorizer.cc @@ -1015,7 +1015,7 @@ vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab, if (!simduid_to_vf_htab) simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); simduid_to_vf_data->simduid = DECL_UID (loop->simduid); - simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; + simduid_to_vf_data->vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) = simduid_to_vf_data; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1810833a324..a2bab8676af 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -967,7 +967,7 @@ public: #define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \ (L)->epil_using_partial_vectors_p #define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias -#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor +#define LOOP_VINFO_VECT_FACTOR(L) ((L)->vectorization_factor + 0) #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor #define LOOP_VINFO_MASKS(L) (L)->masks #define LOOP_VINFO_LENS(L) (L)->lens -- 2.35.3