On Tue, Aug 19, 2014 at 09:39:56PM +0100, Steve Ellcey wrote: > Here is an official submission for the switch optimization described in > PR 54742. I have addressed the formatting/comment issues that were raised > and also added a test case based on comment #27 from PR 54742 and I fixed a > bug I found while doing benchmarking with SPEC2006 (the perl benchmark was > generating an ICE in a routine with multiple switch statements). > > I ran the benchmarking to see if I could find any more tests that are > helped like coremark is and while I found a number of benchmarks in > SPEC 2006 and EEMBC where the optimization is triggered, this optimization > generally didn't affect the performance of those benchmarks. The biggest > impact I could find was on the perl benchmark in SPEC where I saw around > a 0.4% improvement on a MIPS 74k. Not huge, but not nothing.
For what it is worth, I see a nice (~4%) improvement in Crafty from SPEC 2000. I haven't investigated too deeply, but at a first glance the number of branch mispredictions has dropped just over 1%, as you might hope from this optimisation. I can also attest to there being a number of places the optimisation is triggered (with high enough parameters; I was running with --param max-switch-paths=1000 --param max-switch-insns=10000), but like you I don't see much measurable change in execution time. Thanks, James > > So, OK to checkin? > > Steve Ellcey > sell...@mips.com > > > 2014-08-12 Steve Ellcey <sell...@mips.com> > > PR tree-opt/54742 > * Makefile.in (OBJS): Add tree-switch-shortcut.o. > * common.opt (ftree-switch-shortcut): New. > * opts.c (default_options_table): Add OPT_ftree_switch_shortcut. > * params.def (PARAM_MAX_SWITCH_INSNS): New. > (PARAM_MAX_SWITCH_PATHS): New. > * passes.def (pass_tree_switch_shortcut): New. > * timevar.def (TV_TREE_SWITCH_SHORTCUT): New. > * tree-pass.h (make_pass_tree_switch_shortcut): New. > * tree-switch-shortcut.c: New. > > > 2014-08-12 Steve Ellcey <sell...@mips.com> > > PR tree-opt/54742 > * gcc.dg/pr54742.c: New test. > diff --git a/gcc/testsuite/gcc.dg/pr54742.c b/gcc/testsuite/gcc.dg/pr54742.c > new file mode 100644 > index 0000000..77aa8ba > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr54742.c > @@ -0,0 +1,50 @@ > +/* PR tree-optimization/54742 > + Verify that the tree-optimization-shortcut pass completely removes > + the switch statement. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > + > +int sum0, sum1, sum2, sum3; > +int foo(char * s, char** ret) > +{ > + int state=0; > + char c; > + > + for (; *s && state != 4; s++) > + { > + c = *s; > + if (c == '*') > + { > + s++; > + break; > + } > + switch (state) { > + case 0: > + if (c == '+') state = 1; > + else if (c != '-') sum0+=c; > + break; > + case 1: > + if (c == '+') state = 2; > + else if (c == '-') state = 0; > + else sum1+=c; > + break; > + case 2: > + if (c == '+') state = 3; > + else if (c == '-') state = 1; > + else sum2+=c; > + break; > + case 3: > + if (c == '-') state = 2; > + else if (c == 'x') state = 4; > + break; > + default: > + break; > + } > + } > + *ret = s; > + return state; > +} > + > +/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > diff --git a/gcc/Makefile.in b/gcc/Makefile.in > index 31c1f4d..94e8ec4 100644 > --- a/gcc/Makefile.in > +++ b/gcc/Makefile.in > @@ -1411,6 +1411,7 @@ OBJS = \ > tree-scalar-evolution.o \ > tree-sra.o \ > tree-switch-conversion.o \ > + tree-switch-shortcut.o \ > tree-ssa-address.o \ > tree-ssa-alias.o \ > tree-ssa-ccp.o \ > diff --git a/gcc/common.opt b/gcc/common.opt > index 0c4f86b..fe0664a 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -2249,6 +2249,10 @@ ftree-sra > Common Report Var(flag_tree_sra) Optimization > Perform scalar replacement of aggregates > > +ftree-switch-shortcut > +Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization > +Convert jumps to switch statements into jumps to case statement. > + > ftree-ter > Common Report Var(flag_tree_ter) Optimization > Replace temporary expressions in the SSA->normal pass > diff --git a/gcc/opts.c b/gcc/opts.c > index be1867c..f1ac2e5 100644 > --- a/gcc/opts.c > +++ b/gcc/opts.c > @@ -514,6 +514,7 @@ static const struct default_options > default_options_table[] = > { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, > VECT_COST_MODEL_DYNAMIC }, > { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, > { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, > + { OPT_LEVELS_3_PLUS, OPT_ftree_switch_shortcut, NULL, 1 }, > > /* -Ofast adds optimizations to -O3. */ > { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 }, > diff --git a/gcc/params.def b/gcc/params.def > index cad00e2..65377d3 100644 > --- a/gcc/params.def > +++ b/gcc/params.def > @@ -1058,6 +1058,20 @@ DEFPARAM (PARAM_MAX_SLSR_CANDIDATE_SCAN, > "strength reduction", > 50, 1, 999999) > > +/* Maximum number of instructions to duplicate when shortcutting a switch. > */ > +DEFPARAM (PARAM_MAX_SWITCH_INSNS, > + "max-switch-insns", > + "Maximum number of instructions to duplicate when " > + "shortcutting a switch statement", > + 100, 1, 999999) > + > +/* Maximum number of paths to duplicate when shortcutting a switch. */ > +DEFPARAM (PARAM_MAX_SWITCH_PATHS, > + "max-switch-paths", > + "Maximum number of new paths to create when" > + " shortcutting a switch statement", > + 50, 1, 999999) > + > DEFPARAM (PARAM_ASAN_STACK, > "asan-stack", > "Enable asan stack protection", > diff --git a/gcc/passes.def b/gcc/passes.def > index f13df6c..8bbf2d0 100644 > --- a/gcc/passes.def > +++ b/gcc/passes.def > @@ -157,6 +157,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_cselim); > NEXT_PASS (pass_copy_prop); > NEXT_PASS (pass_tree_ifcombine); > + NEXT_PASS (pass_tree_switch_shortcut); > NEXT_PASS (pass_phiopt); > NEXT_PASS (pass_tail_recursion); > NEXT_PASS (pass_ch); > diff --git a/gcc/timevar.def b/gcc/timevar.def > index a04d05c..d9ee915 100644 > --- a/gcc/timevar.def > +++ b/gcc/timevar.def > @@ -170,6 +170,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical > iv") > DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop") > DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") > DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") > +DEFTIMEVAR (TV_TREE_SWITCH_SHORTCUT , "switch statement shortcuts") > DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") > DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") > DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization") > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index 1477d1f..f898e27 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -575,6 +575,7 @@ extern gimple_opt_pass *make_pass_early_inline > (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_inline_parameters (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_tree_switch_shortcut (gcc::context *ctxt); > > /* Current optimization pass. */ > extern opt_pass *current_pass; > diff --git a/gcc/tree-switch-shortcut.c b/gcc/tree-switch-shortcut.c > new file mode 100644 > index 0000000..4518f79 > --- /dev/null > +++ b/gcc/tree-switch-shortcut.c > @@ -0,0 +1,438 @@ > +/* Switch shortcutting optimization for GNU C > + Copyright (C) 2013 Free Software Foundation, Inc. > + Contributed by Steve Ellcey (steve.ell...@imgtec.com). > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +/* This file implements an optimization where, when a variable is set > + to a constant value and there is a path that leads from that definition > + to a switch statement that uses that variable as its controlling > expression > + we duplicate the blocks on this path and change the jump to the switch > + statement with a direct jump to the label of the switch block that control > + would goto based on the value of the variable. This can come up in > + loops/switch statements that implement state machines. > + > + Example (modified from PR 54742): > + > + foo(char *str) { > + int sum=0; > + int state=0; > + char *s=str; > + for (; *s; s++) { > + char c=*s; > + <CODE BLOCK 1> > + switch (state) { > + case 0: > + if (c == '+') { state = 1; sum += 9; } > + else if (c != '-') { state = 2; sum += 3; } > + break; > + case 1: > + if (c == '+') { state = 2; sum += 4; } > + else if (c == '-') { state = 0; sum += 7; } > + break; > + case 2: > + if (c == '+') { state = 0; sum += 8; } > + else if (c == '-') { state = 1; sum += 2; } > + break; > + } > + <CODE BLOCK 2> > + } > + return state; > + } > + > + This pass will convert the code inside 'case 0' to something like: > + > + case 0: > + if (c == '+') { state = 1; sum += 9; > + <CODE BLOCK 2> > + s++; if (!s) goto loop_exit; > + <CODE BLOCK 1> > + goto case_1; } > + else if (c != '-') { state = 2; sum += 3; > + <CODE BLOCK 2> > + s++; if (!s) goto loop_exit; > + <CODE BLOCK 1> > + goto case_2; } > + else { <CODE BLOCK 2> > + s++; if (!s) goto exit; > + <CODE BLOCK 1> > + goto case_0; } > + > +Similar transformations would apply to the other parts of the switch > +statement. This obviously can lead to a lot of code duplication but > +it can also result in faster code since we are replacing two jumps > +(one indirect) with a single direct jump. */ > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "tm.h" > +#include "params.h" > +#include "flags.h" > +#include "tree.h" > +#include "tree-pass.h" > +#include "basic-block.h" > +#include "function.h" > +#include "hash-table.h" > +#include "tree-ssa-alias.h" > +#include "tree-cfg.h" > +#include "tree-ssa-operands.h" > +#include "tree-inline.h" > +#include "gimple-expr.h" > +#include "is-a.h" > +#include "gimple.h" > +#include "tree-phinodes.h" > +#include "gimple-iterator.h" > +#include "gimple-ssa.h" > +#include "ssa-iterators.h" > +#include "tree-into-ssa.h" > +#include "cfgloop.h" > + > +/* Helper function for find_path, visited_bbs is used to make sure we don't > + fall into an infinite loop. */ > + > +static int > +find_path_1 (basic_block start_bb, basic_block end_bb, > + hash_set<basic_block> *visited_bbs) > +{ > + edge_iterator ei; > + edge e; > + > + if (start_bb == end_bb) return 1; > + > + if (!visited_bbs->add (start_bb)) > + { > + FOR_EACH_EDGE (e, ei, start_bb->succs) > + if (find_path_1 (e->dest, end_bb, visited_bbs)) > + return 1; > + } > + return 0; > +} > + > +/* Return 1 if there is a path from start_bb to end_bb and 0 if there > + is not. There may be multiple paths from start_bb to end_bb. */ > + > +static int > +find_path (basic_block start_bb, basic_block end_bb) > +{ > + edge_iterator ei; > + edge e; > + hash_set<basic_block> visited_bbs; > + int p = 0; > + > + if (start_bb == end_bb) return 1; > + > + if (!visited_bbs.add (start_bb)) > + { > + FOR_EACH_EDGE (e, ei, start_bb->succs) > + if (find_path_1 (e->dest, end_bb, &visited_bbs)) > + { > + p = 1; > + break; > + } > + } > + return p; > +} > + > + > +/* We save the paths we want to copy in bbs_list_array. n_bbs_list is the > + number of paths saved, bbs_list_array[i] is the list of basic blocks in > + one path. Each path starts with the block where a variable is assigned > + a constant value (bbs_list_array[i][0]) and ends with the switch statement > + block (bbs_list_array[i][bbs_list_size[i]-2]) followed by the block that > + the switch statement is going to go to given the constant value of the > + variable (bbs_list_array[i][bbs_list_size[i]-1]). */ > + > +struct path_info > +{ > + basic_block **bbs_list_array; > + int *val_array; > + int *bbs_list_size; > + int max_path_count; > + int max_insn_count; > + int n_bbs_list; > +}; > + > +/* bbs_list[0] is the block with the switch statement, > + bbs_list[n-1] is the block where the switch statement variable is assigned > + a constant value, > + The entries in between make a (reverse) path between the two. > + > + We don't want to change bb_list, we want to leave that alone and > + and copy the path to bbs_list_array so that we wind up with a list (array) > + of paths that we want to update. We also want to add the block that the > + switch is going to go to on to the list so that we know which exit from > + the switch statement is important. */ > + > +static void > +save_new_path (basic_block *bbs_list, int n, tree val, path_info *pi) > +{ > + int i; > + int insn_count; > + basic_block bb; > + edge switch_taken_edge; > + gimple_stmt_iterator gsi; > + > + if (n <= 1) return; > + > + if (pi->n_bbs_list >= pi->max_path_count) > + return; > + > + /* Put the blocks in 'correct' order and add in where we want to go after > + the switch statement, We want to leave bbs_list untouched for future > + calls. */ > + > + pi->bbs_list_array[pi->n_bbs_list] = XNEWVEC (basic_block, n+1); > + for (i = 0; i < n; i++) > + pi->bbs_list_array[pi->n_bbs_list][i] = bbs_list[n-i-1]; > + > + switch_taken_edge = find_taken_edge (bbs_list[0], val); > + pi->bbs_list_array[pi->n_bbs_list][n] = switch_taken_edge->dest; > + > + pi->bbs_list_size[pi->n_bbs_list] = n + 1; > + pi->val_array[pi->n_bbs_list] = (int) TREE_INT_CST_LOW (val); > + > + /* Count how many instructions are in the blocks we are going to > + duplicate and if there are too many do not save this path > + (return without incrementing n_bbs_list). */ > + > + insn_count = 0; > + for (i = 1; i < n; i++) > + { > + bb = pi->bbs_list_array[pi->n_bbs_list][i]; > + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + insn_count += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights); > + } > + > + if (insn_count > pi->max_insn_count) > + return; > + > + pi->n_bbs_list = pi->n_bbs_list + 1; > +} > + > +/* switch_stmt is a switch statement whose switch index expression > + is the variable expr. We trace the value of the variable back > + through any phi nodes looking for places where it gets a constant > + value and save the path in bbs_list. Then we call save_new_path > + to create a list of such paths. */ > + > +static void > +process_switch (tree expr, gimple switch_stmt, > + hash_set<gimple> *visited_phis, > + basic_block *bbs_list, int n, > + path_info *pi) > +{ > + gimple def_stmt; > + tree var; > + unsigned int i; > + edge e; > + edge_iterator ei; > + basic_block bbx; > + basic_block var_bb; > + int e_count; > + > + gcc_assert (gimple_code (switch_stmt) == GIMPLE_SWITCH); > + var = SSA_NAME_VAR (expr); > + def_stmt = SSA_NAME_DEF_STMT (expr); > + var_bb = gimple_bb (def_stmt); > + > + if (var == NULL || var_bb == NULL) return; > + > + /* We have a variable definition (var) that is defined in var_bb, > + We want to put the path from var_bb to the current bb into the > + bbs_list. If there is more then one path, skip this and don't > + try to do the optimization. */ > + > + bbx = bbs_list[n-1]; > + while (bbx != var_bb) > + { > + e_count = 0; > + FOR_EACH_EDGE (e, ei, bbx->preds) > + if (find_path (var_bb, e->src)) > + { > + bbs_list[n] = e->src; > + n = n + 1; > + e_count = e_count + 1; > + } > + if (e_count != 1) return; > + bbx = bbs_list[n-1]; > + } > + > + if (gimple_code (def_stmt) == GIMPLE_PHI > + && !visited_phis->add (def_stmt)) > + { > + for (i = 0; i < gimple_phi_num_args (def_stmt); i++) > + { > + tree arg = gimple_phi_arg_def (def_stmt, i); > + if (arg && TREE_CODE (arg) == INTEGER_CST) > + { > + /* const char *name = IDENTIFIER_POINTER (DECL_NAME (var)); */ > + bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src; > + save_new_path (bbs_list, n + 1, arg, pi); > + } > + else if (arg && TREE_CODE (arg) == SSA_NAME) > + { > + bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src; > + process_switch (arg, switch_stmt, visited_phis, bbs_list, n+1, > pi); > + } > + } > + } > +} > + > +/* Find paths that lead from blocks where a variable is assigned a constant > + value to a switch statement where that variable is used as the switch > + index. Save the paths in bbs_list_array so that they can be processed > + by copy_switch_paths. */ > + > +static unsigned int > +find_switch_shortcuts (function *fun, path_info *pi) > +{ > + basic_block bb; > + hash_set<gimple> visited_phis; > + basic_block *bbs_list; > + int n = 1; > + > + bbs_list = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); > + FOR_EACH_BB_FN (bb, fun) > + { > + gimple stmt = last_stmt (bb); > + if (stmt && gimple_code (stmt) == GIMPLE_SWITCH) > + { > + tree op = gimple_switch_index (stmt); > + tree var = SSA_NAME_VAR (op); > + if (var) > + { > + bbs_list[0] = bb; > + process_switch (op, stmt, &visited_phis, bbs_list, n, pi); > + } > + } > + } > + XDELETEVEC (bbs_list); > + return 0; > +} > + > +/* Call gimple_duplicate_sese_region to douplicate the blocks in bb_list. > + We free and recalculate all ssa and dominance information afterwords > + because the region being copied is not really SESE and so we cannot > + trust gimple_duplicate_sese_region to correctly update the dataflow > + information. */ > + > +static void > +duplicate_blocks (basic_block *bb_list, int bb_count) > +{ > + edge orig_edge, exit_edge; > + loop_p loop; > + > + orig_edge = find_edge (bb_list[0], bb_list[1]); > + exit_edge = find_edge (bb_list[bb_count-2], bb_list[bb_count-1]); > + /* Earlier block duplications may have removed the path that we > + saved earlier and are trying to duplicate here. */ > + if (orig_edge != NULL && exit_edge != NULL) > + { > + gimple_duplicate_sese_region (orig_edge, exit_edge, &bb_list[1], > + bb_count-2, NULL, false); > + free_dominance_info (CDI_DOMINATORS); > + update_ssa (TODO_update_ssa); > + calculate_dominance_info (CDI_DOMINATORS); > + loops_state_set (LOOPS_NEED_FIXUP); > + } > +} > + > +/* Go through the paths saved in bbs_list_array and make copies of them. */ > + > +static void > +copy_switch_paths (path_info *pi) > +{ > + int i; > + > + /* Process each path in bbs_list_size. */ > + for (i = 0; i < pi->n_bbs_list; i++) > + { > + /* For each path in bbs_list_size loop through and copy each block in > + the path (except the first on where the constant is assigned and > + the final one where the switch statement goes to. */ > + > + if (!single_pred_p (pi->bbs_list_array[i][1])) > + duplicate_blocks (pi->bbs_list_array[i], pi->bbs_list_size[i]); > + } > +} > + > + > +/* Main entry for the tree if-conversion pass. */ > + > +namespace { > + > +const pass_data pass_data_tree_switch_shortcut = > +{ > + GIMPLE_PASS, /* type */ > + "switch_shortcut", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_TREE_SWITCH_SHORTCUT, /* tv_id */ > + ( PROP_cfg | PROP_ssa ), /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + TODO_update_ssa, /* todo_flags_finish */ > +}; > + > +class pass_tree_switch_shortcut : public gimple_opt_pass > +{ > +public: > + pass_tree_switch_shortcut (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_tree_switch_shortcut, ctxt) > + {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) > + { > + return flag_tree_switch_shortcut; > + } > + virtual unsigned int execute (function *); > + > +}; // class pass_tree_switch_shortcut > + > +unsigned int > +pass_tree_switch_shortcut::execute (function *fun) > +{ > + int i; > + path_info *pi; > + > + pi = XNEW (path_info); > + pi->n_bbs_list = 0; > + pi->max_insn_count = PARAM_VALUE (PARAM_MAX_SWITCH_INSNS); > + pi->max_path_count = PARAM_VALUE (PARAM_MAX_SWITCH_PATHS); > + pi->val_array = XNEWVEC (int, pi->max_path_count); > + pi->bbs_list_size = XNEWVEC (int, pi->max_path_count); > + pi->bbs_list_array = XNEWVEC (basic_block *, pi->max_path_count); > + find_switch_shortcuts (fun, pi); > + copy_switch_paths (pi); > + XDELETEVEC (pi->val_array); > + XDELETEVEC (pi->bbs_list_size); > + for (i = 0; i < pi->n_bbs_list; i++) > + XDELETEVEC (pi->bbs_list_array[i]); > + XDELETEVEC (pi->bbs_list_array); > + XDELETE (pi); > + return 0; > +} > + > +} // anon namespace > + > +gimple_opt_pass * > +make_pass_tree_switch_shortcut (gcc::context *ctxt) > +{ > + return new pass_tree_switch_shortcut (ctxt); > +}