Hi,
I have attached revamped version of Kugan's patch
(https://gcc.gnu.org/ml/gcc/2013-06/msg00100.html) for PR43721.
Test-case: http://pastebin.com/QMfpXLD9
divmod pass dump: http://pastebin.com/yMY1ikCp
Assembly: http://pastebin.com/kk2HZpvA
The approach I took is similar to sincos pass, which involves two parts:
a) divmod pass that transforms:
t1 = a / b;
t2 = a % b;
to the following sequence:
complex_tmp = DIVMOD (a, b);
t1 = REALPART_EXPR(a);
t2 = IMAGPART_EXPR(b);
b) DIVMOD(a, b) is represented as an internal-fn and is expanded by
expand_DIVMOD().
I am not sure if I have done this correctly. I was somehow looking to
reuse expand_divmod() but am not able to figure out how to do that
(AFAIU expand_divmod() strictly returns either the quotient or
remainder but never both which is what I want), so ended up with
manually expanding to rtx.
While going through the sincos pass in execute_cse_sincos_1, I didn't
understand the following:
if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
cfg_changed = true;
Um why is the call to gimple_purge_dead_eh_edges necessary here?
A silly question, what options to pass to gcc to print statistics ? I
added statistics_counter_event to keep track of number of calls
inserted/used but not sure how to print them :/
I would be grateful for suggestions for improving the patch.
Thank you,
Prathamesh
2015-10-30 Prathamesh Kulkarni <[email protected]>
Kugan Vivekanandarajah <[email protected]>
* internal-fn.c (expand_DIVMOD): New internal function.
* internal-fn.def: Add entry for expand_DIVMOD.
* passes.def (pass_divmod): New pass.
* tree-pass.h (make_pass_divmod): Declare.
* tree-ssa-math-opts.c: Implement pass_divmod.
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index f12d3af..5fd95f2 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -1958,6 +1958,72 @@ expand_VA_ARG (gcall *stmt ATTRIBUTE_UNUSED)
gcc_unreachable ();
}
+static void
+expand_DIVMOD (gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+
+ gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE);
+ tree type = TREE_TYPE (TREE_TYPE (lhs));
+ machine_mode mode = TYPE_MODE (type);
+ optab tab = (TYPE_UNSIGNED (type)) ? udivmod_optab : sdivmod_optab;
+
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+ /* Check if optab handler exists for udivmod/sdivmod. */
+ if (optab_handler (tab, mode) != CODE_FOR_nothing)
+ {
+ rtx quotient = gen_reg_rtx (mode);
+ rtx remainder = gen_reg_rtx (mode);
+ expand_twoval_binop (tab, op0, op1, quotient, remainder, TYPE_UNSIGNED
(type));
+
+ /* Wrap the return value (quotient, remaineder) within COMPLEX_EXPR */
+ expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs),
+ make_tree (TREE_TYPE (arg0), quotient),
+ make_tree (TREE_TYPE (arg1), remainder)),
+ target, VOIDmode, EXPAND_NORMAL);
+
+ return;
+ }
+
+ rtx libfunc = NULL_RTX;
+
+ machine_mode compute_mode;
+ for (compute_mode = mode;
+ compute_mode != VOIDmode;
+ compute_mode = GET_MODE_WIDER_MODE (compute_mode))
+ {
+ libfunc = optab_libfunc (tab, compute_mode);
+ if (libfunc != NULL_RTX)
+ break;
+ }
+
+ gcc_assert (libfunc != NULL_RTX);
+
+ if (compute_mode != mode)
+ {
+ op0 = convert_modes (compute_mode, GET_MODE (op0), op0, tab);
+ op1 = convert_modes (compute_mode, GET_MODE (op1), op1, tab);
+ }
+
+ machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE
(compute_mode), MODE_INT);
+ rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
+ libval_mode, 2, op0, compute_mode, op1,
compute_mode);
+
+ rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
+ rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
GET_MODE_SIZE (compute_mode));
+
+ /* Wrap the return value (quotient, remaineder) within COMPLEX_EXPR */
+ expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs),
+ make_tree (TREE_TYPE (arg0), quotient),
+ make_tree (TREE_TYPE (arg1), remainder)),
+ target, VOIDmode, EXPAND_NORMAL);
+}
+
/* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 305cf1b..1d6fbab 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -65,3 +65,4 @@ DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST | ECF_LEAF |
ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
+DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
diff --git a/gcc/passes.def b/gcc/passes.def
index dc3f44c..896623a 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -272,6 +272,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_simduid_cleanup);
NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_cse_reciprocals);
+ NEXT_PASS (pass_divmod);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_strength_reduction);
NEXT_PASS (pass_tracer);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index c37e4b2..643acb3 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -421,6 +421,7 @@ extern gimple_opt_pass *make_pass_cse_reciprocals
(gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_divmod (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 9223642..1b414e4 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -3630,3 +3630,231 @@ make_pass_optimize_widening_mul (gcc::context *ctxt)
{
return new pass_optimize_widening_mul (ctxt);
}
+
+namespace {
+
+struct divmod_stats_
+{
+ int n_calls;
+ int n_used;
+
+ divmod_stats_ (): n_calls (0), n_used (0) {}
+} divmod_stats;
+
+const pass_data pass_data_divmod =
+{
+ GIMPLE_PASS, /* type */
+ "divmod", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_divmod : public gimple_opt_pass
+{
+public:
+ pass_divmod (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_divmod, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return optimize;
+ }
+
+ virtual unsigned int execute (function *);
+
+private:
+ unsigned execute_divmod_1 (gimple *);
+ void maybe_record_stmt (vec<gimple *>& stmts, basic_block& top_bb, gimple
*use_stmt);
+ gimple_stmt_iterator get_later_stmt (const basic_block& top_bb, gimple
*stmt1, gimple *stmt2);
+
+}; // class pass_divmod
+
+void
+pass_divmod::maybe_record_stmt (vec<gimple *>& stmts, basic_block& top_bb,
gimple *use_stmt)
+{
+ /* FIXME: Use same condition for adding use_stmt to vector as in sincos ? */
+ maybe_record_sincos (&stmts, &top_bb, use_stmt);
+}
+
+/* Return gsi for stmt that occurs later. */
+
+gimple_stmt_iterator
+pass_divmod::get_later_stmt (const basic_block& top_bb, gimple *stmt1, gimple
*stmt2)
+{
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_after_labels (top_bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (gsi_stmt (gsi) == stmt1)
+ return gsi_for_stmt (stmt2);
+ else if (gsi_stmt (gsi) == stmt2)
+ return gsi_for_stmt (stmt1);
+
+ gcc_unreachable ();
+}
+
+/* Pass operates in two phases:
+ * a) Collect all stmts with TRUNC_DIV_EXPR/TRUNC_MOD_EXPR having same
operands into vec stmts;
+ * b) Create divmod library call, and update statements in stmts vector to use
the divmod's return value.
+ */
+
+unsigned
+pass_divmod::execute_divmod_1 (gimple *stmt)
+{
+ enum tree_code code = (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR) ?
TRUNC_MOD_EXPR : TRUNC_DIV_EXPR;
+
+ vec<gimple *> stmts = vNULL;
+ stmts.safe_push (stmt);
+ basic_block top_bb = gimple_bb (stmt);
+
+ tree op1 = gimple_assign_rhs1 (stmt);
+ tree op2 = gimple_assign_rhs2 (stmt);
+
+ use_operand_p use_p;
+ imm_use_iterator use_iter;
+
+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, op1)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (is_gimple_assign (use_stmt)
+ && gimple_assign_rhs_code (use_stmt) == code)
+ {
+ tree u_op1 = gimple_assign_rhs1 (use_stmt);
+ tree u_op2 = gimple_assign_rhs2 (use_stmt);
+
+ if ((operand_equal_p (op1, u_op1, 0) && operand_equal_p (op2, u_op2,
0))
+ || (operand_equal_p (op1, u_op2, 0) && operand_equal_p (op2,
u_op1, 0)))
+ maybe_record_stmt (stmts, top_bb, use_stmt);
+ }
+ }
+
+ if (stmts.is_empty ())
+ return false;
+
+ /* Create the library call. */
+ gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
+ tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE
(gimple_assign_lhs (stmt))), call_stmt, "divmod_tmp");
+ gimple_call_set_lhs (call_stmt, res);
+
+ divmod_stats.n_calls++;
+
+ /* Insert the call-stmt. */
+ gimple *def_stmt_op1 = SSA_NAME_DEF_STMT (op1);
+ bool op1_def_in_bb =
+ (!SSA_NAME_IS_DEFAULT_DEF (op1)
+ && gimple_code (def_stmt_op1) != GIMPLE_PHI
+ && gimple_bb (def_stmt_op1) == top_bb);
+
+ gimple *def_stmt_op2 = SSA_NAME_DEF_STMT (op2);
+ bool op2_def_in_bb =
+ (!SSA_NAME_IS_DEFAULT_DEF (op2)
+ && gimple_code (def_stmt_op2) != GIMPLE_PHI
+ && gimple_bb (def_stmt_op2) == top_bb);
+
+ /* 3 cases arise where to insert the call to divmod depending upon where op1
and op2 are defined wrt top_bb:
+ * Case 1: When both def are outside top_bb, simply insert after labels */
+ if (!op1_def_in_bb && !op2_def_in_bb)
+ {
+ gcc_assert (top_bb != 0);
+ gimple_stmt_iterator gsi = gsi_after_labels (top_bb);
+ gsi_insert_before (&gsi, call_stmt, GSI_SAME_STMT);
+ }
+
+ /* Case 2: When both are in top_bb */
+ else if (op1_def_in_bb && op2_def_in_bb)
+ {
+ /* FIXME: better way to compare iterators?. */
+ gimple_stmt_iterator gsi = get_later_stmt (top_bb, def_stmt_op1,
def_stmt_op2);
+ gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
+ }
+
+ /* Case 3: When one def is inside top_bb and one is outside. */
+ else
+ {
+ gimple_stmt_iterator gsi;
+
+ if (op1_def_in_bb)
+ gsi = gsi_for_stmt (def_stmt_op1);
+ else
+ gsi = gsi_for_stmt (def_stmt_op2);
+
+ gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
+ }
+
+ /* Update stmts. */
+ bool cfg_changed = false;
+ gimple *use_stmt;
+ for (unsigned i = 0; i < stmts.length (); ++i)
+ {
+ tree rhs;
+ use_stmt = stmts[i];
+
+ switch (gimple_assign_rhs_code (use_stmt))
+ {
+ case TRUNC_DIV_EXPR:
+ rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
+ break;
+
+ case TRUNC_MOD_EXPR:
+ rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gassign *assign_stmt = gimple_build_assign (gimple_assign_lhs
(use_stmt), rhs);
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+ gsi_replace (&gsi, assign_stmt, true);
+ if (gimple_purge_dead_eh_edges (gimple_bb (assign_stmt)))
+ cfg_changed = true;
+
+ divmod_stats.n_used++;
+ }
+
+ stmts.release ();
+ return cfg_changed;
+}
+
+unsigned
+pass_divmod::execute (function *fun)
+{
+ calculate_dominance_info (CDI_DOMINATORS);
+ basic_block bb;
+
+ bool cfg_changed = false;
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_assign (stmt)
+ && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_binary)
+ {
+ if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR)
+ cfg_changed |= execute_divmod_1 (stmt);
+ }
+ }
+ }
+
+ statistics_counter_event (fun, "number of divmod calls inserted",
divmod_stats.n_calls);
+ statistics_counter_event (fun, "number of divmod calls used",
divmod_stats.n_used);
+
+ free_dominance_info (CDI_DOMINATORS);
+ return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_divmod (gcc::context *ctxt)
+{
+ return new pass_divmod (ctxt);
+}