This applies some TLC to the vectorizers various niter and related computes.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2013-11-21 Richard Biener <rguent...@suse.de> * tree-vect-loop-manip.c (vect_build_loop_niters, vect_generate_tmps_on_preheader): Move ... * tree-vect-loop.c (vect_build_loop_niters, vect_generate_tmps_on_preheader): ... here and simplify. (vect_transform_loop): Call them here and pass down results to consumers. * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Get niter variables from caller. (vect_do_peeling_for_alignment): Likewise. * tree-vectorizer.h (vect_generate_tmps_on_preheader): Remove. (vect_do_peeling_for_loop_bound, vect_do_peeling_for_alignment): Adjust prototypes. Index: gcc/tree-vect-loop-manip.c =================================================================== *** gcc/tree-vect-loop-manip.c (revision 205118) --- gcc/tree-vect-loop-manip.c (working copy) *************** find_loop_location (struct loop *loop) *** 1400,1550 **** } - /* This function builds ni_name = number of iterations loop executes - on the loop preheader. If SEQ is given the stmt is instead emitted - there. */ - - static tree - vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq) - { - tree ni_name, var; - gimple_seq stmts = NULL; - edge pe; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); - - var = create_tmp_var (TREE_TYPE (ni), "niters"); - ni_name = force_gimple_operand (ni, &stmts, false, var); - - pe = loop_preheader_edge (loop); - if (stmts) - { - if (seq) - gimple_seq_add_seq (&seq, stmts); - else - { - basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - - return ni_name; - } - - - /* This function generates the following statements: - - ni_name = number of iterations loop executes - ratio = ni_name / vf - ratio_mult_vf_name = ratio * vf - - and places them at the loop preheader edge or in COND_EXPR_STMT_LIST - if that is non-NULL. */ - - void - vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, - tree *ni_name_ptr, - tree *ratio_mult_vf_name_ptr, - tree *ratio_name_ptr, - gimple_seq cond_expr_stmt_list) - { - - edge pe; - basic_block new_bb; - gimple_seq stmts; - tree ni_name, ni_minus_gap_name; - tree var; - tree ratio_name; - tree ratio_mult_vf_name; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree ni = LOOP_VINFO_NITERS (loop_vinfo); - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - tree log_vf; - - pe = loop_preheader_edge (loop); - - /* Generate temporary variable that contains - number of iterations loop executes. */ - - ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list); - log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); - - /* If epilogue loop is required because of data accesses with gaps, we - subtract one iteration from the total number of iterations here for - correct calculation of RATIO. */ - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - { - ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), - ni_name, - build_one_cst (TREE_TYPE (ni_name))); - if (!is_gimple_val (ni_minus_gap_name)) - { - var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); - - stmts = NULL; - ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, - true, var); - if (cond_expr_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, stmts); - else - { - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - } - else - ni_minus_gap_name = ni_name; - - /* Create: ratio = ni >> log2(vf) */ - - ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), - ni_minus_gap_name, log_vf); - if (!is_gimple_val (ratio_name)) - { - var = create_tmp_var (TREE_TYPE (ni), "bnd"); - - stmts = NULL; - ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); - if (cond_expr_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, stmts); - else - { - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - - /* Create: ratio_mult_vf = ratio << log2 (vf). */ - - ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), - ratio_name, log_vf); - if (!is_gimple_val (ratio_mult_vf_name)) - { - var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); - - stmts = NULL; - ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, - true, var); - if (cond_expr_stmt_list) - gimple_seq_add_seq (&cond_expr_stmt_list, stmts); - else - { - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - } - - *ni_name_ptr = ni_name; - *ratio_mult_vf_name_ptr = ratio_mult_vf_name; - *ratio_name_ptr = ratio_name; - - return; - } - /* Function vect_can_advance_ivs_p In case the number of iterations that LOOP iterates is unknown at compile --- 1400,1405 ---- *************** vect_update_ivs_after_vectorizer (loop_v *** 1762,1771 **** test. */ void ! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, unsigned int th, bool check_profitability) { - tree ni_name, ratio_mult_vf_name; struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *new_loop; edge update_e; --- 1617,1626 ---- test. */ void ! vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, ! tree ni_name, tree ratio_mult_vf_name, unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *new_loop; edge update_e; *************** vect_do_peeling_for_loop_bound (loop_vec *** 1781,1795 **** initialize_original_copy_tables (); - /* Generate the following variables on the preheader of original loop: - - ni_name = number of iteration the original loop executes - ratio = ni_name / vf - ratio_mult_vf_name = ratio * vf */ - vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, - &ratio_mult_vf_name, ratio, - cond_expr_stmt_list); - loop_num = loop->num; new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), --- 1636,1641 ---- *************** vect_update_inits_of_drs (loop_vec_info *** 2025,2035 **** peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ void ! vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ! tree niters_of_prolog_loop, ni_name; tree n_iters; tree wide_prolog_niters; struct loop *new_loop; --- 1871,1881 ---- peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ void ! vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name, unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ! tree niters_of_prolog_loop; tree n_iters; tree wide_prolog_niters; struct loop *new_loop; *************** vect_do_peeling_for_alignment (loop_vec_ *** 2043,2049 **** initialize_original_copy_tables (); ! ni_name = vect_build_loop_niters (loop_vinfo, NULL); niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name, &bound); --- 1889,1896 ---- initialize_original_copy_tables (); ! gimple_seq stmts = NULL; ! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name, &bound); Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c (revision 205118) --- gcc/tree-vect-loop.c (working copy) *************** vect_loop_kill_debug_uses (struct loop * *** 5572,5577 **** --- 5572,5681 ---- } } + + /* This function builds ni_name = number of iterations. Statements + are queued onto SEQ. */ + + static tree + vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq) + { + tree ni_name, var; + gimple_seq stmts = NULL; + tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); + + var = create_tmp_var (TREE_TYPE (ni), "niters"); + ni_name = force_gimple_operand (ni, &stmts, false, var); + + if (stmts) + gimple_seq_add_seq (seq, stmts); + + return ni_name; + } + + + /* This function generates the following statements: + + ni_name = number of iterations loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf + + and places them in COND_EXPR_STMT_LIST. */ + + static void + vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, + tree ni_name, + tree *ratio_mult_vf_name_ptr, + tree *ratio_name_ptr, + gimple_seq *cond_expr_stmt_list) + { + gimple_seq stmts; + tree ni_minus_gap_name; + tree var; + tree ratio_name; + tree ratio_mult_vf_name; + tree ni = LOOP_VINFO_NITERS (loop_vinfo); + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + tree log_vf; + + log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); + + /* If epilogue loop is required because of data accesses with gaps, we + subtract one iteration from the total number of iterations here for + correct calculation of RATIO. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + { + ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), + ni_name, + build_one_cst (TREE_TYPE (ni_name))); + if (!is_gimple_val (ni_minus_gap_name)) + { + var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); + + stmts = NULL; + ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, + true, var); + gimple_seq_add_seq (cond_expr_stmt_list, stmts); + } + } + else + ni_minus_gap_name = ni_name; + + /* Create: ratio = ni >> log2(vf) */ + + ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), + ni_minus_gap_name, log_vf); + if (!is_gimple_val (ratio_name)) + { + var = create_tmp_var (TREE_TYPE (ni), "bnd"); + + stmts = NULL; + ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); + gimple_seq_add_seq (cond_expr_stmt_list, stmts); + } + *ratio_name_ptr = ratio_name; + + /* Create: ratio_mult_vf = ratio << log2 (vf). */ + + if (ratio_mult_vf_name_ptr) + { + ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), + ratio_name, log_vf); + if (!is_gimple_val (ratio_mult_vf_name)) + { + var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); + + stmts = NULL; + ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, + true, var); + gimple_seq_add_seq (cond_expr_stmt_list, stmts); + } + *ratio_mult_vf_name_ptr = ratio_mult_vf_name; + } + + return; + } + + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. *************** vect_transform_loop (loop_vec_info loop_ *** 5636,5646 **** } /* Peel the loop if there are data refs with unknown alignment. ! Only one data ref with unknown store is allowed. */ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) { ! vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability); check_profitability = false; } --- 5740,5757 ---- } /* Peel the loop if there are data refs with unknown alignment. ! Only one data ref with unknown store is allowed. ! This clobbers LOOP_VINFO_NITERS but retains the original ! in LOOP_VINFO_NITERS_UNCHANGED. So we cannot avoid re-computing ! niters. */ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) { ! gimple_seq stmts = NULL; ! tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts); ! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); ! vect_do_peeling_for_alignment (loop_vinfo, ni_name, ! th, check_profitability); check_profitability = false; } *************** vect_transform_loop (loop_vec_info loop_ *** 5655,5670 **** if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) < exact_log2 (vectorization_factor) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) ! vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, ! th, check_profitability); else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); else { ! tree ni_name, ratio_mult_vf; ! vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf, ! &ratio, NULL); } /* 1) Make sure the loop header has exactly two entries --- 5766,5792 ---- if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) < exact_log2 (vectorization_factor) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) ! { ! tree ni_name, ratio_mult_vf; ! gimple_seq stmts = NULL; ! ni_name = vect_build_loop_niters (loop_vinfo, &stmts); ! vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf, ! &ratio, &stmts); ! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); ! vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf, ! th, check_profitability); ! } else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); else { ! tree ni_name; ! gimple_seq stmts = NULL; ! ni_name = vect_build_loop_niters (loop_vinfo, &stmts); ! vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, ! &ratio, &stmts); ! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); } /* 1) Make sure the loop header has exactly two entries Index: gcc/tree-vectorizer.h =================================================================== *** gcc/tree-vectorizer.h (revision 205118) --- gcc/tree-vectorizer.h (working copy) *************** extern void slpeel_make_loop_iterate_nti *** 937,947 **** extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge); extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); ! extern void vect_generate_tmps_on_preheader (loop_vec_info, tree *, tree *, ! tree *, gimple_seq); ! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *, unsigned int, bool); ! extern void vect_do_peeling_for_alignment (loop_vec_info, unsigned int, bool); extern LOC find_loop_location (struct loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); --- 937,946 ---- extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge); extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); ! extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree, tree, unsigned int, bool); ! extern void vect_do_peeling_for_alignment (loop_vec_info, tree, ! unsigned int, bool); extern LOC find_loop_location (struct loop *); extern bool vect_can_advance_ivs_p (loop_vec_info);