This re-organizes builtin detection and generation properly into an analysis and transform phase (noting that when we fail late we will generate wrong code at the moment - eventually non-fatal, but at least it will have duplicate work in the left-over loop).
With this in place adding more kinds of builtins to detect should not be any rocket science anymore. Bootstrap and regtest pending on x86_64-unknown-linux-gnu. Richard. 2012-05-31 Richard Guenther <rguent...@suse.de> * tree-loop-distribution.c (enum partition_kind): New enum. (struct partition_s): Add kind and main_stmt members. (partition_alloc): Initialize kind to PKIND_NORMAL. (partition_builtin_p): New function. (copy_loop_before): Remove failure path and assert instead. (generate_loops_for_partition): Likewise. (generate_memset_zero): Fold into ... (generate_memset_builtin): ... this. (classify_partition): New function with code from can_generate_builtin and generate_builtin. (generate_builtin): Remove. (can_generate_builtin): Likewise. (fuse_partitions_with_similar_memory_accesses): Call partition_builtin_p instead of can_generate_builtin. (rdg_build_partitions): Do not call fuse_partitions_with_similar_memory_accesses here... (ldist_gen): ... but here after classifying all partitions. Remove failure path of generate_code_for_partition. (generate_code_for_partition): Generate code according to partition classification. Index: trunk/gcc/tree-loop-distribution.c =================================================================== *** trunk.orig/gcc/tree-loop-distribution.c 2012-05-31 15:58:12.000000000 +0200 --- trunk/gcc/tree-loop-distribution.c 2012-05-31 16:26:17.255459534 +0200 *************** along with GCC; see the file COPYING3. *** 52,60 **** --- 52,65 ---- #include "tree-scalar-evolution.h" #include "tree-pass.h" + enum partition_kind { PKIND_NORMAL, PKIND_MEMSET }; + typedef struct partition_s { bitmap stmts; + enum partition_kind kind; + /* Main statement a kind != PKIND_NORMAL partition is about. */ + gimple main_stmt; } *partition_t; DEF_VEC_P (partition_t); *************** partition_alloc (bitmap stmts) *** 67,72 **** --- 72,78 ---- { partition_t partition = XCNEW (struct partition_s); partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); + partition->kind = PKIND_NORMAL; return partition; } *************** partition_free (partition_t partition) *** 79,84 **** --- 85,97 ---- free (partition); } + /* Returns true if the partition can be generated as a builtin. */ + + static bool + partition_builtin_p (partition_t partition) + { + return partition->kind != PKIND_NORMAL; + } /* If bit I is not set, it means that this node represents an operation that has already been performed, and that should not be *************** copy_loop_before (struct loop *loop) *** 183,198 **** struct loop *res; edge preheader = loop_preheader_edge (loop); - if (!single_exit (loop)) - return NULL; - initialize_original_copy_tables (); res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader); free_original_copy_tables (); - if (!res) - return NULL; - update_phis_for_loop_copy (loop, res); rename_variables_in_loop (res); --- 196,206 ---- struct loop *res; edge preheader = loop_preheader_edge (loop); initialize_original_copy_tables (); res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader); + gcc_assert (res != NULL); free_original_copy_tables (); update_phis_for_loop_copy (loop, res); rename_variables_in_loop (res); *************** create_bb_after_loop (struct loop *loop) *** 216,225 **** copied when COPY_P is true. All the statements not flagged in the PARTITION bitmap are removed from the loop or from its copy. The statements are indexed in sequence inside a basic block, and the ! basic blocks of a loop are taken in dom order. Returns true when ! the code gen succeeded. */ ! static bool generate_loops_for_partition (struct loop *loop, partition_t partition, bool copy_p) { --- 224,232 ---- copied when COPY_P is true. All the statements not flagged in the PARTITION bitmap are removed from the loop or from its copy. The statements are indexed in sequence inside a basic block, and the ! basic blocks of a loop are taken in dom order. */ ! static void generate_loops_for_partition (struct loop *loop, partition_t partition, bool copy_p) { *************** generate_loops_for_partition (struct loo *** 230,242 **** if (copy_p) { loop = copy_loop_before (loop); create_preheader (loop, CP_SIMPLE_PREHEADERS); create_bb_after_loop (loop); } - if (loop == NULL) - return false; - /* Remove stmts not in the PARTITION bitmap. The order in which we visit the phi nodes and the statements is exactly as in stmts_from_loop. */ --- 237,247 ---- if (copy_p) { loop = copy_loop_before (loop); + gcc_assert (loop != NULL); create_preheader (loop, CP_SIMPLE_PREHEADERS); create_bb_after_loop (loop); } /* Remove stmts not in the PARTITION bitmap. The order in which we visit the phi nodes and the statements is exactly as in stmts_from_loop. */ *************** generate_loops_for_partition (struct loo *** 293,299 **** } free (bbs); - return true; } /* Build the size argument for a memset call. */ --- 298,303 ---- *************** build_size_arg_loc (location_t loc, tree *** 313,331 **** return x; } ! /* Generate a call to memset. Return true when the operation succeeded. */ static void ! generate_memset_zero (gimple stmt, tree op0, tree nb_iter, ! gimple_stmt_iterator bsi) { ! tree addr_base, nb_bytes; ! bool res = false; gimple_seq stmt_list = NULL, stmts; - gimple fn_call; - tree mem, fn; struct data_reference *dr = XCNEW (struct data_reference); ! location_t loc = gimple_location (stmt); DR_STMT (dr) = stmt; DR_REF (dr) = op0; --- 317,345 ---- return x; } ! /* Generate a call to memset for PARTITION in LOOP. */ static void ! generate_memset_builtin (struct loop *loop, partition_t partition) { ! gimple_stmt_iterator gsi; ! gimple stmt, fn_call; ! tree op0, nb_iter, mem, fn, addr_base, nb_bytes; gimple_seq stmt_list = NULL, stmts; struct data_reference *dr = XCNEW (struct data_reference); ! location_t loc; ! bool res; ! ! stmt = partition->main_stmt; ! loc = gimple_location (stmt); ! op0 = gimple_assign_lhs (stmt); ! if (gimple_bb (stmt) == loop->latch) ! nb_iter = number_of_latch_executions (loop); ! else ! nb_iter = number_of_exit_cond_executions (loop); ! ! /* The new statements will be placed before LOOP. */ ! gsi = gsi_last_bb (loop_preheader_edge (loop)->src); DR_STMT (dr) = stmt; DR_REF (dr) = op0; *************** generate_memset_zero (gimple stmt, tree *** 353,359 **** fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); ! gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "generated memset zero\n"); --- 367,373 ---- fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); ! gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "generated memset zero\n"); *************** generate_memset_zero (gimple stmt, tree *** 361,473 **** free_data_ref (dr); } ! /* Tries to generate a builtin function for the instructions of LOOP ! pointed to by the bits set in PARTITION. Returns true when the ! operation succeeded. */ ! static bool ! generate_builtin (struct loop *loop, partition_t partition, bool copy_p) { ! bool res = false; ! unsigned i, x = 0; basic_block *bbs; ! gimple write = NULL; ! gimple_stmt_iterator bsi; ! tree nb_iter = number_of_exit_cond_executions (loop); ! ! if (!nb_iter || nb_iter == chrec_dont_know) ! return false; bbs = get_loop_body_in_dom_order (loop); ! for (i = 0; i < loop->num_nodes; i++) ! { ! basic_block bb = bbs[i]; ! ! for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) ! x++; ! ! for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) ! { ! gimple stmt = gsi_stmt (bsi); ! ! if (gimple_code (stmt) == GIMPLE_LABEL ! || is_gimple_debug (stmt)) ! continue; ! ! if (!bitmap_bit_p (partition->stmts, x++)) ! continue; ! ! /* If the stmt has uses outside of the loop fail. ! ??? If the stmt is generated in another partition that ! is not created as builtin we can ignore this. */ ! if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) ! { ! if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, "not generating builtin, partition has " ! "scalar uses outside of the loop\n"); ! goto end; ! } ! ! if (is_gimple_assign (stmt) ! && !is_gimple_reg (gimple_assign_lhs (stmt))) ! { ! /* Don't generate the builtins when there are more than ! one memory write. */ ! if (write != NULL) ! goto end; ! ! write = stmt; ! if (bb == loop->latch) ! nb_iter = number_of_latch_executions (loop); ! } ! } ! } ! ! if (!stmt_with_adjacent_zero_store_dr_p (write)) ! goto end; ! ! /* The new statements will be placed before LOOP. */ ! bsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi); ! res = true; ! ! /* If this is the last partition for which we generate code, we have ! to destroy the loop. */ ! if (!copy_p) ! { ! unsigned nbbs = loop->num_nodes; ! edge exit = single_exit (loop); ! basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; ! redirect_edge_pred (exit, src); ! exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); ! exit->flags |= EDGE_FALLTHRU; ! cancel_loop_tree (loop); ! rescan_loop_exit (exit, false, true); ! ! for (i = 0; i < nbbs; i++) ! delete_basic_block (bbs[i]); ! set_immediate_dominator (CDI_DOMINATORS, dest, ! recompute_dominator (CDI_DOMINATORS, dest)); ! } ! ! end: free (bbs); ! return res; } ! /* Generates code for PARTITION. For simple loops, this function can ! generate a built-in. */ ! static bool generate_code_for_partition (struct loop *loop, partition_t partition, bool copy_p) { ! if (generate_builtin (loop, partition, copy_p)) ! return true; ! return generate_loops_for_partition (loop, partition, copy_p); } --- 375,430 ---- free_data_ref (dr); } ! /* Remove and destroy the loop LOOP. */ ! static void ! destroy_loop (struct loop *loop) { ! unsigned nbbs = loop->num_nodes; ! edge exit = single_exit (loop); ! basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; basic_block *bbs; ! unsigned i; bbs = get_loop_body_in_dom_order (loop); ! redirect_edge_pred (exit, src); ! exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); ! exit->flags |= EDGE_FALLTHRU; ! cancel_loop_tree (loop); ! rescan_loop_exit (exit, false, true); ! for (i = 0; i < nbbs; i++) ! delete_basic_block (bbs[i]); free (bbs); ! ! set_immediate_dominator (CDI_DOMINATORS, dest, ! recompute_dominator (CDI_DOMINATORS, dest)); } ! /* Generates code for PARTITION. */ ! static void generate_code_for_partition (struct loop *loop, partition_t partition, bool copy_p) { ! switch (partition->kind) ! { ! case PKIND_MEMSET: ! generate_memset_builtin (loop, partition); ! /* If this is the last partition for which we generate code, we have ! to destroy the loop. */ ! if (!copy_p) ! destroy_loop (loop); ! break; ! ! case PKIND_NORMAL: ! generate_loops_for_partition (loop, partition, copy_p); ! break; ! default: ! gcc_unreachable (); ! } } *************** rdg_build_components (struct graph *rdg, *** 828,859 **** BITMAP_FREE (saved_components); } ! /* Returns true when it is possible to generate a builtin pattern for ! the PARTITION of RDG. For the moment we detect only the memset ! zero pattern. */ ! static bool ! can_generate_builtin (struct graph *rdg, partition_t partition) { - unsigned i; bitmap_iterator bi; ! int nb_reads = 0; ! int nb_writes = 0; ! int stores_zero = 0; EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) ! if (RDG_MEM_READS_STMT (rdg, i)) ! nb_reads++; ! else if (RDG_MEM_WRITE_STMT (rdg, i)) ! { ! gimple stmt = RDG_STMT (rdg, i); ! nb_writes++; ! if (!gimple_has_volatile_ops (stmt) ! && stmt_with_adjacent_zero_store_dr_p (stmt)) ! stores_zero++; ! } ! return stores_zero == 1 && nb_writes == 1 && nb_reads == 0; } /* Returns true when PARTITION1 and PARTITION2 have similar memory --- 785,855 ---- BITMAP_FREE (saved_components); } ! /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. ! For the moment we detect only the memset zero pattern. */ ! static void ! classify_partition (loop_p loop, struct graph *rdg, partition_t partition) { bitmap_iterator bi; ! unsigned i; ! tree nb_iter; ! ! partition->kind = PKIND_NORMAL; ! partition->main_stmt = NULL; ! ! /* Perform general partition disqualification for builtins. */ ! nb_iter = number_of_exit_cond_executions (loop); ! if (!nb_iter || nb_iter == chrec_dont_know) ! return; EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) ! { ! gimple stmt = RDG_STMT (rdg, i); ! ! if (gimple_has_volatile_ops (stmt)) ! return; ! /* If the stmt has uses outside of the loop fail. ! ??? If the stmt is generated in another partition that ! is not created as builtin we can ignore this. */ ! if (gimple_code (stmt) != GIMPLE_PHI ! && stmt_has_scalar_dependences_outside_loop (loop, stmt)) ! { ! if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, "not generating builtin, partition has " ! "scalar uses outside of the loop\n"); ! return; ! } ! } ! ! /* Detect memset. */ ! EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) ! { ! gimple stmt = RDG_STMT (rdg, i); ! ! if (gimple_code (stmt) == GIMPLE_PHI) ! continue; ! ! /* Any scalar stmts are ok. */ ! if (!gimple_vuse (stmt)) ! continue; ! ! /* Exactly one store. */ ! if (gimple_assign_single_p (stmt) ! && !is_gimple_reg (gimple_assign_lhs (stmt))) ! { ! if (partition->main_stmt != NULL) ! return; ! partition->main_stmt = stmt; ! } ! else ! return; ! } ! ! if (partition->main_stmt != NULL ! && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt)) ! partition->kind = PKIND_MEMSET; } /* Returns true when PARTITION1 and PARTITION2 have similar memory *************** fuse_partitions_with_similar_memory_acce *** 891,900 **** partition_t partition1, partition2; FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1) ! if (!can_generate_builtin (rdg, partition1)) FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2) if (p1 != p2 ! && !can_generate_builtin (rdg, partition2) && similar_memory_accesses (rdg, partition1, partition2)) { bitmap_ior_into (partition1->stmts, partition2->stmts); --- 887,896 ---- partition_t partition1, partition2; FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1) ! if (!partition_builtin_p (partition1)) FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2) if (p1 != p2 ! && !partition_builtin_p (partition2) && similar_memory_accesses (rdg, partition1, partition2)) { bitmap_ior_into (partition1->stmts, partition2->stmts); *************** rdg_build_partitions (struct graph *rdg, *** 971,978 **** VEC_safe_push (partition_t, heap, *partitions, partition); else partition_free (partition); - - fuse_partitions_with_similar_memory_accesses (rdg, partitions); } /* Dump to FILE the PARTITIONS. */ --- 967,972 ---- *************** ldist_gen (struct loop *loop, struct gra *** 1101,1111 **** processed); BITMAP_FREE (processed); nbp = VEC_length (partition_t, partitions); if (nbp == 0 || (nbp == 1 ! && !can_generate_builtin (rdg, ! VEC_index (partition_t, partitions, 0))) || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) goto ldist_done; --- 1095,1109 ---- processed); BITMAP_FREE (processed); + FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) + classify_partition (loop, rdg, partition); + + fuse_partitions_with_similar_memory_accesses (rdg, &partitions); + nbp = VEC_length (partition_t, partitions); if (nbp == 0 || (nbp == 1 ! && !partition_builtin_p (VEC_index (partition_t, partitions, 0))) || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) goto ldist_done; *************** ldist_gen (struct loop *loop, struct gra *** 1114,1121 **** dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! if (!generate_code_for_partition (loop, partition, i < nbp - 1)) ! goto ldist_done; rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); mark_sym_for_renaming (gimple_vop (cfun)); --- 1112,1118 ---- dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) ! generate_code_for_partition (loop, partition, i < nbp - 1); rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); mark_sym_for_renaming (gimple_vop (cfun));