On Wed, 19 Oct 2011, Jakub Jelinek wrote:

> Hi!
> 
> Similarly to casts of bool to integer, even stores into bool arrays
> can be handled similarly.  Just we need to ensure tree-vect-data-refs.c
> doesn't reject vectorization before tree-vect-patterns.c has a chance
> to optimize it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok with ...

> 2011-10-19  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/50596
>       * tree-vect-stmts.c (vect_mark_relevant): Only use
>       FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
>       (vectorizable_store): If is_pattern_stmt_p look through
>       VIEW_CONVERT_EXPR on lhs.
>       * tree-vect-patterns.c (vect_recog_bool_pattern): Optimize
>       also stores into bool memory in addition to casts from bool
>       to integral types.
>       (vect_mark_pattern_stmts): If pattern_stmt already has vinfo
>       created, don't create it again.
>       * tree-vect-data-refs.c (vect_analyze_data_refs): For stores
>       into bool memory use vectype for integral type corresponding
>       to bool's mode.
>       * tree-vect-loop.c (vect_determine_vectorization_factor): Give up
>       if a store into bool memory hasn't been replaced by the pattern
>       recognizer.
> 
>       * gcc.dg/vect/vect-cond-10.c: New test.
> 
> --- gcc/tree-vect-stmts.c.jj  2011-10-18 23:52:07.000000000 +0200
> +++ gcc/tree-vect-stmts.c     2011-10-19 14:19:00.000000000 +0200
> @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w
>            /* This use is out of pattern use, if LHS has other uses that are
>               pattern uses, we should mark the stmt itself, and not the 
> pattern
>               stmt.  */
> -          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> -            {
> -              if (is_gimple_debug (USE_STMT (use_p)))
> -                continue;
> -              use_stmt = USE_STMT (use_p);
> +       if (TREE_CODE (lhs) == SSA_NAME)
> +         FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> +           {
> +             if (is_gimple_debug (USE_STMT (use_p)))
> +               continue;
> +             use_stmt = USE_STMT (use_p);
>  
> -              if (vinfo_for_stmt (use_stmt)
> -                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> -                {
> -                  found = true;
> -                  break;
> -                }
> -            }
> +             if (vinfo_for_stmt (use_stmt)
> +                 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> +               {
> +                 found = true;
> +                 break;
> +               }
> +           }
>          }
>  
>        if (!found)
> @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_
>      return false;
>  
>    scalar_dest = gimple_assign_lhs (stmt);
> +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> +      && is_pattern_stmt_p (stmt_info))
> +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
>    if (TREE_CODE (scalar_dest) != ARRAY_REF
>        && TREE_CODE (scalar_dest) != INDIRECT_REF
>        && TREE_CODE (scalar_dest) != COMPONENT_REF

Just change the if () stmt to

 if (!handled_component_p (scalar_dest)
     && TREE_CODE (scalar_dest) != MEM_REF)
   return false;

> --- gcc/tree-vect-patterns.c.jj       2011-10-18 23:52:05.000000000 +0200
> +++ gcc/tree-vect-patterns.c  2011-10-19 13:55:27.000000000 +0200
> @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he
>        VEC_safe_push (gimple, heap, *stmts, last_stmt);
>        return pattern_stmt;
>      }
> +  else if (rhs_code == SSA_NAME
> +        && STMT_VINFO_DATA_REF (stmt_vinfo))
> +    {
> +      stmt_vec_info pattern_stmt_info;
> +      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
> +      gcc_assert (vectype != NULL_TREE);
> +      if (!check_bool_pattern (var, loop_vinfo))
> +     return NULL;
> +
> +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> +     {
> +       lhs = copy_node (lhs);

We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
do it here.  In fact, just unconditionally do ...

> +       TREE_TYPE (lhs) = TREE_TYPE (vectype);
> +     }
> +      else
> +     lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);

... this (wrap it in a V_C_E).  No need to special-case any
MEM_REFs.

> +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))

This should never be false, so you can as well unconditionally build
the conversion stmt.

> +     {
> +       tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> +       gimple cast_stmt
> +         = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
> +       STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
> +       rhs = rhs2;
> +     }
> +      pattern_stmt
> +     = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
> +      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> +      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> +      STMT_VINFO_DATA_REF (pattern_stmt_info)
> +     = STMT_VINFO_DATA_REF (stmt_vinfo);
> +      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
> +     = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
> +      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT 
> (stmt_vinfo);
> +      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
> +     = STMT_VINFO_DR_OFFSET (stmt_vinfo);
> +      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP 
> (stmt_vinfo);
> +      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
> +     = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
> +      *type_out = vectype;
> +      *type_in = vectype;
> +      VEC_safe_push (gimple, heap, *stmts, last_stmt);
> +      return pattern_stmt;
> +    }
>    else
>      return NULL;
>  }
> @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm
>    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
>    gimple def_stmt;
>  
> -  set_vinfo_for_stmt (pattern_stmt,
> -                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
> -  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>    pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
> +  if (pattern_stmt_info == NULL)
> +    {
> +      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> +      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> +    }
> +  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>  
>    STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
>    STMT_VINFO_DEF_TYPE (pattern_stmt_info)
> -     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
> +    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
>    STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
>    STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
>    STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
>    STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
> -     = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
> +    = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
>    if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
>      {
>        def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
> --- gcc/tree-vect-data-refs.c.jj      2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree-vect-data-refs.c 2011-10-19 14:37:44.000000000 +0200
> @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo
>  
>        /* Set vectype for STMT.  */
>        scalar_type = TREE_TYPE (DR_REF (dr));
> -      STMT_VINFO_VECTYPE (stmt_info) =
> -                get_vectype_for_scalar_type (scalar_type);
> +      STMT_VINFO_VECTYPE (stmt_info)
> +     = get_vectype_for_scalar_type (scalar_type);
> +      if (!STMT_VINFO_VECTYPE (stmt_info)
> +       && ((TYPE_PRECISION (scalar_type) == 1
> +            && TYPE_UNSIGNED (scalar_type))
> +           || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> +       && DR_IS_WRITE (dr)
> +       && loop_vinfo)
> +     {
> +       /* For bool stores use integral type with the same
> +          TYPE_MODE, but bigger precision.  vect_recog_bool_pattern
> +          can transform those into something vectorizable.  */
> +       unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type));
> +       scalar_type = build_nonstandard_integer_type (modesize, 1);
> +       STMT_VINFO_VECTYPE (stmt_info)
> +         = get_vectype_for_scalar_type (scalar_type);
> +     }
>        if (!STMT_VINFO_VECTYPE (stmt_info))
>          {
>            if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> --- gcc/tree-vect-loop.c.jj   2011-09-26 14:06:52.000000000 +0200
> +++ gcc/tree-vect-loop.c      2011-10-19 14:49:18.000000000 +0200
> @@ -1,5 +1,5 @@
>  /* Loop Vectorization
> -   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
> +   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
>     Free Software Foundation, Inc.
>     Contributed by Dorit Naishlos <do...@il.ibm.com> and
>     Ira Rosen <i...@il.ibm.com>
> @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
>             gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
>                         || is_pattern_stmt_p (stmt_info));
>             vectype = STMT_VINFO_VECTYPE (stmt_info);
> +           if (STMT_VINFO_DATA_REF (stmt_info))
> +             {
> +               struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> +               tree scalar_type = TREE_TYPE (DR_REF (dr));
> +               /* vect_analyze_data_refs will allow bool writes through,
> +                  in order to allow vect_recog_bool_pattern to transform
> +                  those.  If they couldn't be transformed, give up now.  */
> +               if (((TYPE_PRECISION (scalar_type) == 1
> +                     && TYPE_UNSIGNED (scalar_type))
> +                    || TREE_CODE (scalar_type) == BOOLEAN_TYPE)

Shouldn't it be always possible to vectorize those?  For loads
we can assume the memory contains only 1 or 0 (we assume that for
scalar loads), for stores we can mask out all other bits explicitly
if you add support for truncating conversions to non-mode precision
(in fact, we could support non-mode precision vectorization that way,
if not support bitfield loads or extending conversions).

So maybe that obsoletes my conditional approval ;)  Can you
investigate whether the above would work?

Thanks,
Richard.

> +                   && DR_IS_WRITE (dr)
> +                   && !is_pattern_stmt_p (stmt_info))
> +                 {
> +                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> +                     {
> +                       fprintf (vect_dump,
> +                                "not vectorized: unsupported data-type ");
> +                       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
> +                     }
> +                   return false;
> +                 }
> +             }
>           }
>         else
>           {
> --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj       2011-10-19 
> 15:54:42.000000000 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c  2011-10-19 16:00:22.000000000 
> +0200
> @@ -0,0 +1,165 @@
> +/* { dg-require-effective-target vect_cond_mixed } */
> +
> +#include "tree-vect.h"
> +
> +#define N 1024
> +float a[N], b[N], c[N], d[N];
> +_Bool k[N];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      k[i] = x & y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      k[i] = x | y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      p[i] = x & y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    p[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      p[i] = x | y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    p[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +
> +  check_vect ();
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      switch (i % 9)
> +     {
> +     case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
> +     case 1: a[i] = 0; b[i] = 0; break;
> +     case 2: a[i] = i + 1; b[i] = - i - 1; break;
> +     case 3: a[i] = i; b[i] = i + 7; break;
> +     case 4: a[i] = i; b[i] = i; break;
> +     case 5: a[i] = i + 16; b[i] = i + 3; break;
> +     case 6: a[i] = - i - 5; b[i] = - i; break;
> +     case 7: a[i] = - i; b[i] = - i; break;
> +     case 8: a[i] = - i; b[i] = - i - 7; break;
> +     }
> +    }
> +  for (i = 0; i < N; i++)
> +    {
> +      switch ((i / 9) % 3)
> +     {
> +     case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
> +     case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
> +     case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
> +     }
> +    }
> +  f1 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f2 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f3 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f4 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f5 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f6 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f7 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f8 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } 
> */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> 
>       Jakub
> 
> 

-- 
Richard Guenther <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer

Reply via email to