This patch disentangles data reference and data dependence analysis (for both loop and basic-block vectorization). Doing so avoids the need to fix data dependences for gather loads as well as computing them in the first place for data references we cannot vectorize. The latter speeds up basic-block vectorization as it reduces the quadratic cost of dependence computation to those dependences we will end up using (for polyhedron ac.f90 this cuts 10% of compile-time for me according to callgrind and get's SLP vectorization off the radar of -ftime-report).
The patch also touches data reference finding, avoiding excessive malloc/free there by using a stack vector. That's really an independent change, but ... Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Queued for 4.9. I could split out the data-ref changes and consider the BB vectorization speedup a regression fix - Jakub, any opinion on that? Thanks, Richard. 2013-02-26 Richard Biener <rguent...@suse.de> * tree-data-ref.h (find_data_references_in_loop): Declare. * tree-data-ref.c (get_references_in_stmt): Use a stack vector pre-allocated in the callers. (find_data_references_in_stmt): Adjust. (graphite_find_data_references_in_stmt): Likewise. (create_rdg_vertices): Likewise. (find_data_references_in_loop): Export. * tree-vect-data-refs.c (vect_analyze_data_ref_dependences): Compute dependences here... (vect_analyze_data_refs): ...not here. When we encounter a non-vectorizable data reference in basic-block vectorization truncate the data reference vector. Do not bother to fixup data-dependence information for gather loads. * tree-vect-slp.c (vect_slp_analyze_bb_1): Check the number of data references, as reported. Index: gcc/tree-data-ref.h =================================================================== *** gcc/tree-data-ref.h.orig 2013-02-20 13:22:51.000000000 +0100 --- gcc/tree-data-ref.h 2013-02-26 13:31:44.114507972 +0100 *************** extern bool find_data_references_in_stmt *** 382,387 **** --- 382,388 ---- vec<data_reference_p> *); extern bool graphite_find_data_references_in_stmt (loop_p, loop_p, gimple, vec<data_reference_p> *); + tree find_data_references_in_loop (struct loop *, vec<data_reference_p> *); struct data_reference *create_data_ref (loop_p, loop_p, tree, gimple, bool); extern bool find_loop_nest (struct loop *, vec<loop_p> *); extern struct data_dependence_relation *initialize_data_dependence_relation Index: gcc/tree-data-ref.c =================================================================== *** gcc/tree-data-ref.c.orig 2013-02-20 13:22:51.000000000 +0100 --- gcc/tree-data-ref.c 2013-02-26 14:32:13.571463465 +0100 *************** compute_all_dependences (vec<data_refere *** 4257,4267 **** typedef struct data_ref_loc_d { ! /* Position of the memory reference. */ ! tree *pos; ! /* True if the memory reference is read. */ ! bool is_read; } data_ref_loc; --- 4257,4267 ---- typedef struct data_ref_loc_d { ! /* Position of the memory reference. */ ! tree *pos; ! /* True if the memory reference is read. */ ! bool is_read; } data_ref_loc; *************** typedef struct data_ref_loc_d *** 4269,4283 **** true if STMT clobbers memory, false otherwise. */ static bool ! get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references) { bool clobbers_memory = false; data_ref_loc ref; tree *op0, *op1; enum gimple_code stmt_code = gimple_code (stmt); - references->create (0); - /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. As we cannot model data-references to not spelled out accesses give up if they may occur. */ --- 4269,4281 ---- true if STMT clobbers memory, false otherwise. */ static bool ! get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_stack> *references) { bool clobbers_memory = false; data_ref_loc ref; tree *op0, *op1; enum gimple_code stmt_code = gimple_code (stmt); /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. As we cannot model data-references to not spelled out accesses give up if they may occur. */ *************** find_data_references_in_stmt (struct loo *** 4348,4358 **** vec<data_reference_p> *datarefs) { unsigned i; ! vec<data_ref_loc> references; data_ref_loc *ref; bool ret = true; data_reference_p dr; if (get_references_in_stmt (stmt, &references)) { references.release (); --- 4346,4357 ---- vec<data_reference_p> *datarefs) { unsigned i; ! vec<data_ref_loc, va_stack> references; data_ref_loc *ref; bool ret = true; data_reference_p dr; + vec_stack_alloc (data_ref_loc, references, 2); if (get_references_in_stmt (stmt, &references)) { references.release (); *************** graphite_find_data_references_in_stmt (l *** 4381,4391 **** vec<data_reference_p> *datarefs) { unsigned i; ! vec<data_ref_loc> references; data_ref_loc *ref; bool ret = true; data_reference_p dr; if (get_references_in_stmt (stmt, &references)) { references.release (); --- 4380,4391 ---- vec<data_reference_p> *datarefs) { unsigned i; ! vec<data_ref_loc, va_stack> references; data_ref_loc *ref; bool ret = true; data_reference_p dr; + vec_stack_alloc (data_ref_loc, references, 2); if (get_references_in_stmt (stmt, &references)) { references.release (); *************** find_data_references_in_bb (struct loop *** 4437,4443 **** TODO: This function should be made smarter so that it can handle address arithmetic as if they were array accesses, etc. */ ! static tree find_data_references_in_loop (struct loop *loop, vec<data_reference_p> *datarefs) { --- 4437,4443 ---- TODO: This function should be made smarter so that it can handle address arithmetic as if they were array accesses, etc. */ ! tree find_data_references_in_loop (struct loop *loop, vec<data_reference_p> *datarefs) { *************** create_rdg_vertices (struct graph *rdg, *** 5005,5011 **** FOR_EACH_VEC_ELT (stmts, i, stmt) { ! vec<data_ref_loc> references; data_ref_loc *ref; struct vertex *v = &(rdg->vertices[i]); --- 5005,5011 ---- FOR_EACH_VEC_ELT (stmts, i, stmt) { ! vec<data_ref_loc, va_stack> references; data_ref_loc *ref; struct vertex *v = &(rdg->vertices[i]); *************** create_rdg_vertices (struct graph *rdg, *** 5020,5025 **** --- 5020,5026 ---- if (gimple_code (stmt) == GIMPLE_PHI) continue; + vec_stack_alloc (data_ref_loc, references, 2); get_references_in_stmt (stmt, &references); FOR_EACH_VEC_ELT (references, j, ref) { Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c.orig 2013-02-26 13:31:25.000000000 +0100 --- gcc/tree-vect-data-refs.c 2013-02-26 14:25:52.896275680 +0100 *************** vect_analyze_data_ref_dependences (loop_ *** 798,806 **** dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_dependences ==="); if (loop_vinfo) ! ddrs = LOOP_VINFO_DDRS (loop_vinfo); else ! ddrs = BB_VINFO_DDRS (bb_vinfo); FOR_EACH_VEC_ELT (ddrs, i, ddr) if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) --- 798,818 ---- dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_dependences ==="); if (loop_vinfo) ! { ! if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo), ! &LOOP_VINFO_DDRS (loop_vinfo), ! LOOP_VINFO_LOOP_NEST (loop_vinfo), true)) ! return false; ! ddrs = LOOP_VINFO_DDRS (loop_vinfo); ! } else ! { ! if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo), ! &BB_VINFO_DDRS (bb_vinfo), ! vNULL, true)) ! return false; ! ddrs = BB_VINFO_DDRS (bb_vinfo); ! } FOR_EACH_VEC_ELT (ddrs, i, ddr) if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) *************** vect_analyze_data_refs (loop_vec_info lo *** 2941,2947 **** vec<data_reference_p> datarefs; struct data_reference *dr; tree scalar_type; - bool res, stop_bb_analysis = false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, --- 2953,2958 ---- *************** vect_analyze_data_refs (loop_vec_info lo *** 2950,2962 **** if (loop_vinfo) { loop = LOOP_VINFO_LOOP (loop_vinfo); ! res = compute_data_dependences_for_loop ! (loop, true, ! &LOOP_VINFO_LOOP_NEST (loop_vinfo), ! &LOOP_VINFO_DATAREFS (loop_vinfo), ! &LOOP_VINFO_DDRS (loop_vinfo)); ! ! if (!res) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, --- 2961,2969 ---- if (loop_vinfo) { loop = LOOP_VINFO_LOOP (loop_vinfo); ! if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)) ! || find_data_references_in_loop ! (loop, &LOOP_VINFO_DATAREFS (loop_vinfo))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, *************** vect_analyze_data_refs (loop_vec_info lo *** 2987,3003 **** break; } } - if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo), - &BB_VINFO_DDRS (bb_vinfo), - vNULL, true)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: basic block contains function" - " calls or data references that cannot be" - " analyzed"); - return false; - } datarefs = BB_VINFO_DATAREFS (bb_vinfo); } --- 2994,2999 ---- *************** vect_analyze_data_refs (loop_vec_info lo *** 3024,3035 **** stmt = DR_STMT (dr); stmt_info = vinfo_for_stmt (stmt); - if (stop_bb_analysis) - { - STMT_VINFO_VECTORIZABLE (stmt_info) = false; - continue; - } - /* Check that analysis of the data-ref succeeded. */ if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) || !DR_STEP (dr)) --- 3020,3025 ---- *************** vect_analyze_data_refs (loop_vec_info lo *** 3070,3080 **** } if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } return false; } --- 3060,3066 ---- } if (bb_vinfo) ! break; return false; } *************** vect_analyze_data_refs (loop_vec_info lo *** 3088,3098 **** "constant"); if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } if (gather) free_data_ref (dr); --- 3074,3080 ---- "constant"); if (bb_vinfo) ! break; if (gather) free_data_ref (dr); *************** vect_analyze_data_refs (loop_vec_info lo *** 3109,3119 **** } if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } return false; } --- 3091,3097 ---- } if (bb_vinfo) ! break; return false; } *************** vect_analyze_data_refs (loop_vec_info lo *** 3129,3139 **** } if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } if (gather) free_data_ref (dr); --- 3107,3113 ---- } if (bb_vinfo) ! break; if (gather) free_data_ref (dr); *************** vect_analyze_data_refs (loop_vec_info lo *** 3152,3162 **** } if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } if (gather) free_data_ref (dr); --- 3126,3132 ---- } if (bb_vinfo) ! break; if (gather) free_data_ref (dr); *************** vect_analyze_data_refs (loop_vec_info lo *** 3177,3187 **** } if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } if (gather) free_data_ref (dr); --- 3147,3153 ---- } if (bb_vinfo) ! break; if (gather) free_data_ref (dr); *************** vect_analyze_data_refs (loop_vec_info lo *** 3316,3326 **** } if (bb_vinfo) ! { ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } if (gather) free_data_ref (dr); --- 3282,3288 ---- } if (bb_vinfo) ! break; if (gather) free_data_ref (dr); *************** vect_analyze_data_refs (loop_vec_info lo *** 3346,3357 **** } if (bb_vinfo) ! { ! /* Mark the statement as not vectorizable. */ ! STMT_VINFO_VECTORIZABLE (stmt_info) = false; ! stop_bb_analysis = true; ! continue; ! } if (gather) { --- 3308,3314 ---- } if (bb_vinfo) ! break; if (gather) { *************** vect_analyze_data_refs (loop_vec_info lo *** 3369,3382 **** if (gather) { - unsigned int j, k, n; - struct data_reference *olddr - = datarefs[i]; - vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo); - struct data_dependence_relation *ddr, *newddr; - bool bad = false; tree off; - vec<loop_p> nest = LOOP_VINFO_LOOP_NEST (loop_vinfo); gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL); if (gather --- 3326,3332 ---- *************** vect_analyze_data_refs (loop_vec_info lo *** 3396,3454 **** return false; } - n = datarefs.length () - 1; - for (j = 0, k = i - 1; j < i; j++) - { - ddr = ddrs[k]; - gcc_assert (DDR_B (ddr) == olddr); - newddr = initialize_data_dependence_relation (DDR_A (ddr), dr, - nest); - ddrs[k] = newddr; - free_dependence_relation (ddr); - if (!bad - && DR_IS_WRITE (DDR_A (newddr)) - && DDR_ARE_DEPENDENT (newddr) != chrec_known) - bad = true; - k += --n; - } - - k++; - n = k + datarefs.length () - i - 1; - for (; k < n; k++) - { - ddr = ddrs[k]; - gcc_assert (DDR_A (ddr) == olddr); - newddr = initialize_data_dependence_relation (dr, DDR_B (ddr), - nest); - ddrs[k] = newddr; - free_dependence_relation (ddr); - if (!bad - && DR_IS_WRITE (DDR_B (newddr)) - && DDR_ARE_DEPENDENT (newddr) != chrec_known) - bad = true; - } - - k = ddrs.length () - - datarefs.length () + i; - ddr = ddrs[k]; - gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr); - newddr = initialize_data_dependence_relation (dr, dr, nest); - ddrs[k] = newddr; - free_dependence_relation (ddr); datarefs[i] = dr; - - if (bad) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: data dependence conflict" - " prevents gather load"); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); - } - return false; - } - STMT_VINFO_GATHER_P (stmt_info) = true; } else if (loop_vinfo --- 3346,3352 ---- *************** vect_analyze_data_refs (loop_vec_info lo *** 3472,3477 **** --- 3370,3391 ---- } } + /* If we stopped analysis at the first dataref we could not analyze + when trying to vectorize a basic-block mark the rest of the datarefs + as not vectorizable and truncate the vector of datarefs. That + avoids spending useless time in analyzing their dependence. */ + if (i != datarefs.length ()) + { + gcc_assert (bb_vinfo != NULL); + for (unsigned j = i; j < datarefs.length (); ++j) + { + data_reference_p dr = datarefs[j]; + STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false; + free_data_ref (dr); + } + datarefs.truncate (i); + } + return true; } Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c.orig 2013-02-26 13:31:25.000000000 +0100 --- gcc/tree-vect-slp.c 2013-02-26 14:01:07.587931536 +0100 *************** static bb_vec_info *** 2085,2091 **** vect_slp_analyze_bb_1 (basic_block bb) { bb_vec_info bb_vinfo; - vec<ddr_p> ddrs; vec<slp_instance> slp_instances; slp_instance instance; int i; --- 2085,2090 ---- *************** vect_slp_analyze_bb_1 (basic_block bb) *** 2107,2114 **** return NULL; } ! ddrs = BB_VINFO_DDRS (bb_vinfo); ! if (!ddrs.length ()) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, --- 2106,2112 ---- return NULL; } ! if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,