On Fri, 17 May 2013, Jakub Jelinek wrote:

> On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote:
> > We have the same issue in some other places where we insert invariant
> > code into the loop body - one reason there is another LIM pass
> > after vectorization.
> 
> Well, in this case it causes the shift amount to be loaded into a vector
> instead of scalar, therefore even when LIM moves it before the loop, it
> will only work with vector/vector shifts and be more expensive that way
> (need to broadcast the value in a vector).  The following patch
> improves it slightly at least for loops, by just emitting the shift amount
> stmts to loop preheader, rotate-4.c used to be only vectorizable with
> -mavx2 (which has vector/vector shifts), now also -mavx (which doesn't)
> vectorizes it.  Unfortunately this trick doesn't work for SLP vectorization,
> emitting the stmts at the start of the current bb doesn't help, because
> every stmt emits its own and thus it is vectorized with vector/vector
> shifts only anyway.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.
 
> 2013-05-17  Jakub Jelinek  <ja...@redhat.com>
> 
>       * tree-vect-patterns.c (vect_recog_rotate_pattern): For
>       vect_external_def oprnd1 with loop_vinfo, try to emit
>       optional cast, negation and and stmts on the loop preheader
>       edge instead of into the pattern def seq.
> 
>       * gcc.target/i386/rotate-4.c: Compile only with -mavx
>       instead of -mavx2, require only avx instead of avx2.
>       * gcc.target/i386/rotate-4a.c: Include avx-check.h instead
>       of avx2-check.h and turn into an avx runtime test instead of
>       avx2 runtime test.
> 
> --- gcc/tree-vect-patterns.c.jj       2013-05-16 13:56:08.000000000 +0200
> +++ gcc/tree-vect-patterns.c  2013-05-16 15:27:00.565143478 +0200
> @@ -1494,6 +1494,7 @@ vect_recog_rotate_pattern (vec<gimple> *
>    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
>    enum vect_def_type dt;
>    optab optab1, optab2;
> +  edge ext_def = NULL;
>  
>    if (!is_gimple_assign (last_stmt))
>      return NULL;
> @@ -1574,6 +1575,21 @@ vect_recog_rotate_pattern (vec<gimple> *
>    if (*type_in == NULL_TREE)
>      return NULL;
>  
> +  if (dt == vect_external_def
> +      && TREE_CODE (oprnd1) == SSA_NAME
> +      && loop_vinfo)
> +    {
> +      struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> +      ext_def = loop_preheader_edge (loop);
> +      if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
> +     {
> +       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
> +       if (bb == NULL
> +           || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
> +         ext_def = NULL;
> +     }
> +    }
> +
>    def = NULL_TREE;
>    if (TREE_CODE (oprnd1) == INTEGER_CST
>        || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
> @@ -1593,7 +1609,14 @@ vect_recog_rotate_pattern (vec<gimple> *
>        def = vect_recog_temp_ssa_var (type, NULL);
>        def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
>                                              NULL_TREE);
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      if (ext_def)
> +     {
> +       basic_block new_bb
> +         = gsi_insert_on_edge_immediate (ext_def, def_stmt);
> +       gcc_assert (!new_bb);
> +     }
> +      else
> +     append_pattern_def_seq (stmt_vinfo, def_stmt);
>      }
>    stype = TREE_TYPE (def);
>  
> @@ -1618,11 +1641,19 @@ vect_recog_rotate_pattern (vec<gimple> *
>        def2 = vect_recog_temp_ssa_var (stype, NULL);
>        def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
>                                              NULL_TREE);
> -      def_stmt_vinfo
> -     = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> -      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> -      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      if (ext_def)
> +     {
> +       basic_block new_bb
> +         = gsi_insert_on_edge_immediate (ext_def, def_stmt);
> +       gcc_assert (!new_bb);
> +     }
> +      else
> +     {
> +       def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> +       set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> +       STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> +       append_pattern_def_seq (stmt_vinfo, def_stmt);
> +     }
>  
>        def2 = vect_recog_temp_ssa_var (stype, NULL);
>        tree mask
> @@ -1630,11 +1661,19 @@ vect_recog_rotate_pattern (vec<gimple> *
>        def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
>                                              gimple_assign_lhs (def_stmt),
>                                              mask);
> -      def_stmt_vinfo
> -     = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> -      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> -      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> -      append_pattern_def_seq (stmt_vinfo, def_stmt);
> +      if (ext_def)
> +     {
> +       basic_block new_bb
> +         = gsi_insert_on_edge_immediate (ext_def, def_stmt);
> +       gcc_assert (!new_bb);
> +     }
> +      else
> +     {
> +       def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
> +       set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
> +       STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
> +       append_pattern_def_seq (stmt_vinfo, def_stmt);
> +     }
>      }
>  
>    var1 = vect_recog_temp_ssa_var (type, NULL);
> --- gcc/testsuite/gcc.target/i386/rotate-4.c.jj       2013-05-16 
> 13:50:14.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/rotate-4.c  2013-05-16 15:23:32.729313026 
> +0200
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-require-effective-target avx2 } */
> -/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */
>  
> --- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj      2013-05-16 
> 14:00:33.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/rotate-4a.c 2013-05-16 15:23:44.791247428 
> +0200
> @@ -1,14 +1,14 @@
>  /* { dg-do run } */
> -/* { dg-require-effective-target avx2 } */
> -/* { dg-options "-O3 -mavx2" } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O3 -mavx" } */
>  
> -#include "avx2-check.h"
> +#include "avx-check.h"
>  
>  #include "rotate-4.c"
>  
>  static void
>  __attribute__((noinline))
> -avx2_test (void)
> +avx_test (void)
>  {
>    int i;
>    for (i = 0; i < 1024; i++)
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend

Reply via email to