On Fri, 17 May 2013, Jakub Jelinek wrote: > On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote: > > We have the same issue in some other places where we insert invariant > > code into the loop body - one reason there is another LIM pass > > after vectorization. > > Well, in this case it causes the shift amount to be loaded into a vector > instead of scalar, therefore even when LIM moves it before the loop, it > will only work with vector/vector shifts and be more expensive that way > (need to broadcast the value in a vector). The following patch > improves it slightly at least for loops, by just emitting the shift amount > stmts to loop preheader, rotate-4.c used to be only vectorizable with > -mavx2 (which has vector/vector shifts), now also -mavx (which doesn't) > vectorizes it. Unfortunately this trick doesn't work for SLP vectorization, > emitting the stmts at the start of the current bb doesn't help, because > every stmt emits its own and thus it is vectorized with vector/vector > shifts only anyway. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. > 2013-05-17 Jakub Jelinek <ja...@redhat.com> > > * tree-vect-patterns.c (vect_recog_rotate_pattern): For > vect_external_def oprnd1 with loop_vinfo, try to emit > optional cast, negation and and stmts on the loop preheader > edge instead of into the pattern def seq. > > * gcc.target/i386/rotate-4.c: Compile only with -mavx > instead of -mavx2, require only avx instead of avx2. > * gcc.target/i386/rotate-4a.c: Include avx-check.h instead > of avx2-check.h and turn into an avx runtime test instead of > avx2 runtime test. > > --- gcc/tree-vect-patterns.c.jj 2013-05-16 13:56:08.000000000 +0200 > +++ gcc/tree-vect-patterns.c 2013-05-16 15:27:00.565143478 +0200 > @@ -1494,6 +1494,7 @@ vect_recog_rotate_pattern (vec<gimple> * > bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); > enum vect_def_type dt; > optab optab1, optab2; > + edge ext_def = NULL; > > if (!is_gimple_assign (last_stmt)) > return NULL; > @@ -1574,6 +1575,21 @@ vect_recog_rotate_pattern (vec<gimple> * > if (*type_in == NULL_TREE) > return NULL; > > + if (dt == vect_external_def > + && TREE_CODE (oprnd1) == SSA_NAME > + && loop_vinfo) > + { > + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > + ext_def = loop_preheader_edge (loop); > + if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1)) > + { > + basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1)); > + if (bb == NULL > + || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb)) > + ext_def = NULL; > + } > + } > + > def = NULL_TREE; > if (TREE_CODE (oprnd1) == INTEGER_CST > || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type)) > @@ -1593,7 +1609,14 @@ vect_recog_rotate_pattern (vec<gimple> * > def = vect_recog_temp_ssa_var (type, NULL); > def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1, > NULL_TREE); > - append_pattern_def_seq (stmt_vinfo, def_stmt); > + if (ext_def) > + { > + basic_block new_bb > + = gsi_insert_on_edge_immediate (ext_def, def_stmt); > + gcc_assert (!new_bb); > + } > + else > + append_pattern_def_seq (stmt_vinfo, def_stmt); > } > stype = TREE_TYPE (def); > > @@ -1618,11 +1641,19 @@ vect_recog_rotate_pattern (vec<gimple> * > def2 = vect_recog_temp_ssa_var (stype, NULL); > def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def, > NULL_TREE); > - def_stmt_vinfo > - = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); > - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); > - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; > - append_pattern_def_seq (stmt_vinfo, def_stmt); > + if (ext_def) > + { > + basic_block new_bb > + = gsi_insert_on_edge_immediate (ext_def, def_stmt); > + gcc_assert (!new_bb); > + } > + else > + { > + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); > + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); > + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; > + append_pattern_def_seq (stmt_vinfo, def_stmt); > + } > > def2 = vect_recog_temp_ssa_var (stype, NULL); > tree mask > @@ -1630,11 +1661,19 @@ vect_recog_rotate_pattern (vec<gimple> * > def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2, > gimple_assign_lhs (def_stmt), > mask); > - def_stmt_vinfo > - = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); > - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); > - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; > - append_pattern_def_seq (stmt_vinfo, def_stmt); > + if (ext_def) > + { > + basic_block new_bb > + = gsi_insert_on_edge_immediate (ext_def, def_stmt); > + gcc_assert (!new_bb); > + } > + else > + { > + def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); > + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); > + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; > + append_pattern_def_seq (stmt_vinfo, def_stmt); > + } > } > > var1 = vect_recog_temp_ssa_var (type, NULL); > --- gcc/testsuite/gcc.target/i386/rotate-4.c.jj 2013-05-16 > 13:50:14.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/rotate-4.c 2013-05-16 15:23:32.729313026 > +0200 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > -/* { dg-require-effective-target avx2 } */ > -/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ > /* { dg-final { cleanup-tree-dump "vect" } } */ > > --- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj 2013-05-16 > 14:00:33.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/rotate-4a.c 2013-05-16 15:23:44.791247428 > +0200 > @@ -1,14 +1,14 @@ > /* { dg-do run } */ > -/* { dg-require-effective-target avx2 } */ > -/* { dg-options "-O3 -mavx2" } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O3 -mavx" } */ > > -#include "avx2-check.h" > +#include "avx-check.h" > > #include "rotate-4.c" > > static void > __attribute__((noinline)) > -avx2_test (void) > +avx_test (void) > { > int i; > for (i = 0; i < 1024; i++) > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend