On Tue, Sep 11, 2012 at 1:07 PM, Marc Glisse <marc.gli...@inria.fr> wrote: > Hello, > > here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the > target is ok with it. > > I am attaching 2 versions of the patch. p-good is the one that passes > testing. p-bad, where I rely on fold_stmt to detect identity permutations, > ICEs towards the end of the pass while checking a bogus gimple stmt (one > that gimple_debug_stmt crashes on if I call it in gdb). From a performance > point of view, p-good makes sense, but I liked the simplicity of p-bad and I > am confused as to why it fails.
Probably because you cannot simply increase num_ops ... > 2012-09-11 Marc Glisse <marc.gli...@inria.fr> > > gcc/ > * tree-ssa-forwprop.c (simplify_vector_constructor): New function. > (ssa_forward_propagate_and_combine): Call it. > > gcc/testsuite/ > * gcc.dg/tree-ssa/forwprop-22.c: New testcase. > > -- > Marc Glisse > Index: Makefile.in > =================================================================== > --- Makefile.in (revision 191173) > +++ Makefile.in (working copy) > @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR > $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ > $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ > $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ > $(GIMPLE_PRETTY_PRINT_H) langhooks.h > tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) > coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) > + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ > + $(TREE_VECTORIZER_H) > tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h > \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ > coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > $(TREE_PRETTY_PRINT_H) > tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_double } */ > +/* { dg-require-effective-target vect_perm } */ > +/* { dg-options "-O -fdump-tree-optimized" } */ > + > +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); > +void f (vec *px, vec *y, vec *z) > +{ > + vec x = *px; > + vec t1 = { x[1], x[0] }; > + vec t2 = { x[0], x[1] }; > + *y = t1; > + *z = t2; > +} > + > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c > ___________________________________________________________________ > Added: svn:keywords > + Author Date Id Revision URL > Added: svn:eol-style > + native > > Index: tree-ssa-forwprop.c > =================================================================== > --- tree-ssa-forwprop.c (revision 191173) > +++ tree-ssa-forwprop.c (working copy) > @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. > #include "tm_p.h" > #include "basic-block.h" > #include "gimple-pretty-print.h" > #include "tree-flow.h" > #include "tree-pass.h" > #include "langhooks.h" > #include "flags.h" > #include "gimple.h" > #include "expr.h" > #include "cfgloop.h" > +#include "tree-vectorizer.h" > > /* This pass propagates the RHS of assignment statements into use > sites of the LHS of the assignment. It's basically a specialized > form of tree combination. It is hoped all of this can disappear > when we have a generalized tree combiner. > > One class of common cases we handle is forward propagating a single use > variable into a COND_EXPR. > > bb0: > @@ -2787,20 +2788,105 @@ simplify_permutation (gimple_stmt_iterat > if (TREE_CODE (op0) == SSA_NAME) > ret = remove_prop_source_from_use (op0); > if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) > ret |= remove_prop_source_from_use (op1); > return ret ? 2 : 1; > } > > return 0; > } > > +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ > + > +static bool > +simplify_vector_constructor (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op, op2, orig, type, elem_type; > + unsigned elem_size, nelts, i; > + enum tree_code code; > + constructor_elt *elt; > + unsigned char *sel; > + bool maybe_ident; > + > + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); > + > + op = gimple_assign_rhs1 (stmt); > + type = TREE_TYPE (op); > + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); > + > + nelts = TYPE_VECTOR_SUBPARTS (type); > + elem_type = TREE_TYPE (type); > + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > + > + sel = XALLOCAVEC (unsigned char, nelts); > + orig = NULL; > + maybe_ident = true; > + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) > + { > + tree ref, op1; > + > + if (i >= nelts) > + return false; > + > + if (TREE_CODE (elt->value) != SSA_NAME) > + return false; > + def_stmt = SSA_NAME_DEF_STMT (elt->value); > + if (!def_stmt || !is_gimple_assign (def_stmt)) > + return false; > + code = gimple_assign_rhs_code (def_stmt); > + if (code != BIT_FIELD_REF) > + return false; > + op1 = gimple_assign_rhs1 (def_stmt); > + ref = TREE_OPERAND (op1, 0); > + if (orig) > + { > + if (ref != orig) > + return false; > + } > + else > + { > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + orig = ref; > + } > + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) > + return false; > + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; > + if (sel[i] != i) maybe_ident = false; > + } > + if (i < nelts) > + return false; > + > + if (maybe_ident) > + { > + gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); > + gimple_set_num_ops (stmt, 2); > + gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); > + update_stmt (stmt); You should either use gimple_assign_set_rhs_from_tree (like here - no need to unshare_expr SSA names) > + return true; > + } > + > + op2 = vect_gen_perm_mask (type, sel); > + if (!op2) > + return false; > + orig = unshare_expr (orig); Likewise. > + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); > + gimple_set_num_ops (stmt, 4); > + gimple_assign_set_rhs1 (stmt, orig); > + gimple_assign_set_rhs2 (stmt, orig); > + gimple_assign_set_rhs3 (stmt, op2); ... or gimple_assign_set_rhs_with_ops, like here. That will re-allocate the stmt if necessary. Ok with that change. Thanks, Richard. > + update_stmt (stmt); > + return true; > +} > + > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; > > cfg_changed = false; > @@ -2958,20 +3044,23 @@ ssa_forward_propagate_and_combine (void) > } > else if (code == VEC_PERM_EXPR) > { > int did_something = simplify_permutation (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > else if (code == BIT_FIELD_REF) > changed = simplify_bitfield_ref (&gsi); > + else if (code == CONSTRUCTOR > + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) > + changed = simplify_vector_constructor (&gsi); > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; > > Index: Makefile.in > =================================================================== > --- Makefile.in (revision 191173) > +++ Makefile.in (working copy) > @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR > $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ > $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ > $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ > $(GIMPLE_PRETTY_PRINT_H) langhooks.h > tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) > coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) > + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ > + $(TREE_VECTORIZER_H) > tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h > \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ > coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > $(TREE_PRETTY_PRINT_H) > tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_double } */ > +/* { dg-require-effective-target vect_perm } */ > +/* { dg-options "-O -fdump-tree-optimized" } */ > + > +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); > +void f (vec *px, vec *y, vec *z) > +{ > + vec x = *px; > + vec t1 = { x[1], x[0] }; > + vec t2 = { x[0], x[1] }; > + *y = t1; > + *z = t2; > +} > + > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c > ___________________________________________________________________ > Added: svn:eol-style > + native > Added: svn:keywords > + Author Date Id Revision URL > > Index: tree-ssa-forwprop.c > =================================================================== > --- tree-ssa-forwprop.c (revision 191173) > +++ tree-ssa-forwprop.c (working copy) > @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. > #include "tm_p.h" > #include "basic-block.h" > #include "gimple-pretty-print.h" > #include "tree-flow.h" > #include "tree-pass.h" > #include "langhooks.h" > #include "flags.h" > #include "gimple.h" > #include "expr.h" > #include "cfgloop.h" > +#include "tree-vectorizer.h" > > /* This pass propagates the RHS of assignment statements into use > sites of the LHS of the assignment. It's basically a specialized > form of tree combination. It is hoped all of this can disappear > when we have a generalized tree combiner. > > One class of common cases we handle is forward propagating a single use > variable into a COND_EXPR. > > bb0: > @@ -2787,20 +2788,94 @@ simplify_permutation (gimple_stmt_iterat > if (TREE_CODE (op0) == SSA_NAME) > ret = remove_prop_source_from_use (op0); > if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) > ret |= remove_prop_source_from_use (op1); > return ret ? 2 : 1; > } > > return 0; > } > > +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ > + > +static bool > +simplify_vector_constructor (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op, op2, orig, type, elem_type; > + unsigned elem_size, nelts, i; > + enum tree_code code; > + constructor_elt *elt; > + unsigned char *sel; > + > + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); > + > + op = gimple_assign_rhs1 (stmt); > + type = TREE_TYPE (op); > + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); > + > + nelts = TYPE_VECTOR_SUBPARTS (type); > + elem_type = TREE_TYPE (type); > + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > + > + sel = XALLOCAVEC (unsigned char, nelts); > + orig = NULL; > + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) > + { > + tree ref, op1; > + > + if (i >= nelts) > + return false; > + > + if (TREE_CODE (elt->value) != SSA_NAME) > + return false; > + def_stmt = SSA_NAME_DEF_STMT (elt->value); > + if (!def_stmt || !is_gimple_assign (def_stmt)) > + return false; > + code = gimple_assign_rhs_code (def_stmt); > + if (code != BIT_FIELD_REF) > + return false; > + op1 = gimple_assign_rhs1 (def_stmt); > + ref = TREE_OPERAND (op1, 0); > + if (orig) > + { > + if (ref != orig) > + return false; > + } > + else > + { > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + orig = ref; > + } > + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) > + return false; > + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; > + } > + if (i < nelts) > + return false; > + > + op2 = vect_gen_perm_mask (type, sel); > + if (!op2) > + return false; > + orig = unshare_expr (orig); > + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); > + gimple_set_num_ops (stmt, 4); > + gimple_assign_set_rhs1 (stmt, orig); > + gimple_assign_set_rhs2 (stmt, orig); > + gimple_assign_set_rhs3 (stmt, op2); > + fold_stmt (gsi); > + update_stmt (gsi_stmt (*gsi)); > + return true; > +} > + > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; > > cfg_changed = false; > @@ -2958,20 +3033,23 @@ ssa_forward_propagate_and_combine (void) > } > else if (code == VEC_PERM_EXPR) > { > int did_something = simplify_permutation (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > else if (code == BIT_FIELD_REF) > changed = simplify_bitfield_ref (&gsi); > + else if (code == CONSTRUCTOR > + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) > + changed = simplify_vector_constructor (&gsi); > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; >