Re: [PATCH] Optimize some loops using bool types (PR tree-optimization/50596)

2011-10-16 Thread Ira Rosen
On 12 October 2011 17:54, Jakub Jelinek  wrote:
> Hi!

Hi,

>
> This patch allows vectorization of some loops that use
> bool (which is especially important now that we use bool more often
> even for stmts that weren't originally using bool in the sources),
> in particular (when bool is cast to an integer type, and the bool rhs
> has def stmts within the loop as either BIT_{AND,IOR,XOR}_EXPR,
> or just SSA_NAME assigns or bool -> another bool casts, or comparisons
> (tested recursively).  In that case the pattern recognizer transforms
> the comparisons into COND_EXPRs using suitable integer type (the same width
> as the comparison operands) and other bools to suitable integer types
> with casts added where needed.
>
> The patch doesn't yet handle vectorization of storing into a bool array,
> I'll work on that later.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?

OK with:

> +
> +/* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
> +   stmt (SSA_NAME_DEF_STMT of VAR), but moving the COND_EXPR from 
> RELATED_STMT

by moving?

> +   to PATTERN_DEF_STMT and adding a cast as RELATED_STMT.  */
> +
> +static tree
> +adjust_bool_pattern_cast (tree type, tree var)
> +{
> +  stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
> +  gimple cast_stmt, pattern_stmt;
> +
> +  gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
> +  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
> +  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
> +  cast_stmt
> +    = gimple_build_assign_with_ops (NOP_EXPR,
> +                                   vect_recog_temp_ssa_var (type, NULL),
> +                                   gimple_assign_lhs (pattern_stmt),
> +                                   NULL_TREE);
> +  STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
> +  return gimple_assign_lhs (cast_stmt);
> +}
> +
> +
> +/* Helper function of vect_recog_bool_pattern.  Do the actual 
> transformations,
> +   recursively.  VAR is an SSA_NAME that should be transformed from bool
> +   to a wider integer type, OUT_TYPE is the desired final integer type of
> +   the whole pattern, TRUEVAL should be NULL unless optimizing
> +   BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
> +   in the then_clause, STMTS is where statements with added pattern stmts
> +   should be pushed to.  */
> +
> +static tree
> +adjust_bool_pattern (tree var, tree out_type, tree trueval,
> +                    VEC (gimple, heap) **stmts)
> +{
> +  gimple stmt = SSA_NAME_DEF_STMT (var);
> +  enum tree_code rhs_code, def_rhs_code;
> +  tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
> +  location_t loc;
> +  gimple pattern_stmt, def_stmt;
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +  rhs_code = gimple_assign_rhs_code (stmt);
> +  loc = gimple_location (stmt);
> +  switch (rhs_code)
> +    {
> +    case SSA_NAME:
> +    CASE_CONVERT:
> +      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +      itype = TREE_TYPE (irhs1);
> +      pattern_stmt
> +       = gimple_build_assign_with_ops (SSA_NAME,
> +                                       vect_recog_temp_ssa_var (itype, NULL),
> +                                       irhs1, NULL_TREE);
> +      break;
> +
> +    case BIT_NOT_EXPR:
> +      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +      itype = TREE_TYPE (irhs1);
> +      pattern_stmt
> +       = gimple_build_assign_with_ops (BIT_XOR_EXPR,
> +                                       vect_recog_temp_ssa_var (itype, NULL),
> +                                       irhs1, build_int_cst (itype, 1));
> +      break;
> +
> +    case BIT_AND_EXPR:
> +      /* Try to optimize x = y & (a < b ? 1 : 0); into
> +        x = (a < b ? y : 0);  */

Could you please add some more explanations here? I found it very
difficult to follow. It would be nice to have an example here (similar
to vect_recog_bool_pattern) to illustrate what these statements and
operands are.


> +      def_stmt = SSA_NAME_DEF_STMT (rhs2);
> +      def_rhs_code = gimple_assign_rhs_code (def_stmt);
> +      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
> +       {
> +         tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
> +         irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +         if (TYPE_PRECISION (TREE_TYPE (irhs1))
> +             == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
> +           {
> +             gimple tstmt;
> +             stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
> +             irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
> +             tstmt = VEC_pop (gimple, *stmts);
> +             gcc_assert (tstmt == def_stmt);
> +             VEC_quick_push (gimple, *stmts, stmt);
> +             STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
> +               = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
> +             gcc_assert (!STMT_VINFO_

Re: [PATCH/RFA] Fix up gcc.dg/vect/pr30858.c expected output

2011-10-16 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 14/10/2011 04:43:48 PM:

> All,
>
> The attached patch corrects the expected output of the
> gcc.dg/vect/pr30858.c testcase.
>
> Historically it has expected the output "Unknown def-use cycle pattern."
> just once.
>
> However, recent changes to GCC for ARM targets means that vectorization
> is attempted twice once with a vector size of 128-bits and once with a
> vector size of 64-bits.  This means that the output appears more than
once.
>
> The patch works around this by making the testcase expect one or more
> instances of "Unknown def-use cycle pattern"
>
> Can someone review please?

This is OK with me.

Thanks,
Ira

>
> Thanks,
>
> Matt
>
> gcc/testsuite/ChangeLog:
>
> 2011-10-13  Matthew Gretton-Dann  
>
>   * gcc.dg/vect/pr30858.c: Update expected output for
>   architectures with multiple vector sizes.
>
> --
> Matthew Gretton-Dann
> Principal Engineer, PD Software - Tools, ARM Ltd[attachment "1110-
> fix-pr30858.txt" deleted by Ira Rosen/Haifa/IBM]



[patch] Support subchains of interleaving chains in basic block SLP

2011-10-16 Thread Ira Rosen
Hi,

This patch allows to vectorize a subchain of interleaved loads in
basic block SLP (in loop vectorization this would be more complicated
because of loop peeling). This patch also swaps operands if necessary
(and possible) to make operations isomorphic.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

* tree-vect-stmts.c (vectorizable_load): For SLP without permutation
treat the first load of the node as the first element in its
interleaving chain.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
necessary and possible.
(vect_build_slp_tree): Add new argument.  Allow load groups of any size
in basic blocks.  Keep all the loads for further permutation check.
Use the new argument to determine if there is a permutation.  Update
the recursive calls.
(vect_supported_load_permutation_p): Allow subchains of interleaving
chains in basic block vectorization.
(vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
Check load permutation based on the new parameter.
(vect_schedule_slp_instance): Don't start from the first element in
interleaving chain unless the loads are permuted.

testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-29.c: New test.
Index: ChangeLog
===
--- ChangeLog   (revision 180052)
+++ ChangeLog   (working copy)
@@ -1,3 +1,21 @@
+2011-10-16 Ira Rosen  
+
+   * tree-vect-stmts.c (vectorizable_load): For SLP without permutation
+   treat the first load of the node as the first element in its
+   interleaving chain.
+   * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
+   necessary and possible.
+   (vect_build_slp_tree): Add new argument.  Allow load groups of any size
+   in basic blocks.  Keep all the loads for further permutation check.
+   Use the new argument to determine if there is a permutation.  Update
+   the recursive calls.
+   (vect_supported_load_permutation_p): Allow subchains of interleaving
+   chains in basic block vectorization.
+   (vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
+   Check load permutation based on the new parameter.
+   (vect_schedule_slp_instance): Don't start from the first element in
+   interleaving chain unless the loads are permuted.
+
 2011-10-15  Richard Henderson  
 
* tree-vect-slp.c: Include langhooks.h.
Index: testsuite/gcc.dg/vect/bb-slp-29.c
===
--- testsuite/gcc.dg/vect/bb-slp-29.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-29.c   (revision 0)
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int 
stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+{
+  dst[0] = A*src[0] + B*src[1];
+  dst[1] = A*src[1] + B*src[2];
+  dst[2] = A*src[2] + B*src[3];
+  dst[3] = A*src[3] + B*src[4];
+  dst[4] = A*src[4] + B*src[5];
+  dst[5] = A*src[5] + B*src[6];
+  dst[6] = A*src[6] + B*src[7];
+  dst[7] = A*src[7] + B*src[8];
+  dst += stride;
+  src += stride;
+  if (dummy == 32)
+abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+   dst[i] = 0;
+   src[i] = i;
+}
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+{
+  if (dst[i] != A * src[i] + B * src[i+1])
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 
"slp"  { target { vect_int_mult &&  vect_element_align } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog (revision 180052)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,7 @@
+2011-10-16 Ira Rosen  
+
+   * gcc.dg/vect/bb-slp-29.c: New test.
+
 2011-10-15  Paolo Carlini  
 
PR c++/50732
Index: tree-vect-stmts.c
===
--- tree-vect-stmts.c   (revision 180052)
+++ tree-vect-stmts.c   (working copy)
@@ -4241,6 +4241,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
   if (strided_load)
 {
   first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  if (slp
+  && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
+ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 
0))
+first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+
   /* Check if the chain of loads is

[patch] Fix PR tree-optimization/50727

2011-10-16 Thread Ira Rosen
Hi,

This patch fixes another occurrence of the same bug as in PR 50635.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50727
* tree-vect-patterns.c (vect_operation_fits_smaller_type): Add
DEF_STMT to the list of statements to be replaced by the
pattern statements.

testsuite/ChangeLog:

PR tree-optimization/50727
* gcc.dg/vect/pr50727.c: New test.



Index: testsuite/gcc.dg/vect/pr50727.c
===
--- testsuite/gcc.dg/vect/pr50727.c (revision 0)
+++ testsuite/gcc.dg/vect/pr50727.c (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+void
+f0a (uint32_t * __restrict__ result, uint32_t * arg2,
+ uint8_t * __restrict__ arg4)
+{
+  int idx;
+  for (idx = 0; idx < 429; idx += 1)
+{
+  uint32_t temp_9;
+  uint32_t temp_11;
+  temp_9 = ((-19 | arg4[idx]) >> arg2[idx]);
+  temp_11 = (((-19 ^ arg4[idx]) & arg2[idx]) ^ temp_9);
+  result[idx] = temp_11;
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 180054)
+++ tree-vect-patterns.c(working copy)
@@ -1001,6 +1001,7 @@ vect_operation_fits_smaller_type (gimple stmt, tre
   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
 return false;

+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
   oprnd = gimple_assign_lhs (new_stmt);
 }
   else


[patch] Update gcc.dg/vect/vect-21.c

2011-10-17 Thread Ira Rosen
Hi,

With Jakub's patch for bool types the 3 loops in gcc.dg/vect/vect-21.c
are now vectorizable on targets that support vector conditions.

Tested on powerpc64-suse-linux.
Committed.

Ira

testsuite/ChangeLog:

   * gcc.dg/vect/vect-21.c: Expect the loops to get vectorized on
   targets that support vector condition.

Index: testsuite/gcc.dg/vect/vect-21.c
===
--- testsuite/gcc.dg/vect/vect-21.c (revision 180075)
+++ testsuite/gcc.dg/vect/vect-21.c (working copy)
@@ -123,7 +123,7 @@ int main (void)
   return main1 ();
 }

-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {
target vect_condition } } } */
 /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned
access" 0 "vect" } } */

 /* { dg-final { cleanup-tree-dump "vect" } } */


[patch] Fix gcc.dg/vect/vect-114.c (PR 50746)

2011-10-17 Thread Ira Rosen
Hi,

vect-114.c doesn't have misaligned accesses, so there is no need in
vect_hw_misalign.

Tested on powerpc64-suse-linux.
Committed.

Ira

testsuite/ChangeLog:

PR tree-optimization/50746
* gcc.dg/vect/vect-114.c: Remove vect_hw_misalign.

Index: testsuite/gcc.dg/vect/vect-114.c
===
--- testsuite/gcc.dg/vect/vect-114.c(revision 180104)
+++ testsuite/gcc.dg/vect/vect-114.c(working copy)
@@ -34,7 +34,7 @@ int main (void)
   return main1 ();
 }

-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
target { ! { vect_perm && vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target { vect_perm && vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
target { ! { vect_perm  } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_perm } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */


Re: [patch] Support vectorization of widening shifts

2011-10-18 Thread Ira Rosen
On 18 October 2011 11:43, Jakub Jelinek  wrote:
> On Tue, Oct 18, 2011 at 11:39:22AM +0200, Ira Rosen wrote:
>> On 2 October 2011 10:30, Ira Rosen  wrote:
>> > On 29 September 2011 17:30, Ramana Radhakrishnan
>> >  wrote:
>> >> On 19 September 2011 08:54, Ira Rosen  wrote:
>> >>
>> >>>
>> >>> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
>> >>> and arm-linux-gnueabi
>> >>> OK for mainline?
>> >>
>> >> Sorry I missed this patch. Is there any reason why we need unspecs in
>> >> this case ? Can't this be represented by subregs and zero/ sign
>> >> extensions in RTL without the UNSPECs ?
>>
>> I committed the attached patch with Ramana's solution for testing
>
>> +/* Detect widening shift pattern:
>>
>> +   type a_t;
>> +   TYPE a_T, res_T;
>> +
>> +   S1 a_t = ;
>> +   S2 a_T = (TYPE) a_t;
>> +   S3 res_T = a_T << CONST;
>> +
>> +  where type 'TYPE' is at least double the size of type 'type'.
>> +
>> +  Also detect unsgigned cases:
>
> unsigned

Thanks, I'll fix this.

Ira

>
>        Jakub
>


Re: [RFC PATCH] SLP vectorize calls

2011-10-21 Thread Ira Rosen
On 20 October 2011 23:50, Jakub Jelinek  wrote:
> Hi!

Hi,

>
> While looking at *.vect dumps from Polyhedron, I've noticed the lack
> of SLP vectorization of builtin calls.
>
> This patch is an attempt to handle at least 1 and 2 operand builtin calls
> (SLP doesn't handle ternary stmts either yet),

This is on the top of my todo list :).

> where all the types are the
> same.  E.g. it can handle
> extern float copysignf (float, float);
> extern float sqrtf (float);
> float a[8], b[8], c[8], d[8];
>
> void
> foo (void)
> {
>  a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
>  a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
>  a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
>  a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
>  a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
>  a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
>  a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
>  a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
> }
> and compile it into:
>        vmovaps .LC0(%rip), %ymm0
>        vandnps b(%rip), %ymm0, %ymm1
>        vandps  c(%rip), %ymm0, %ymm0
>        vorps   %ymm0, %ymm1, %ymm0
>        vsqrtps d(%rip), %ymm1
>        vaddps  %ymm1, %ymm0, %ymm0
>        vaddps  .LC1(%rip), %ymm0, %ymm0
>        vmovaps %ymm0, a(%rip)
> I've bootstrapped/regtested it on x86_64-linux and i686-linux, but
> am not 100% sure about all the changes, e.g. that
> || PURE_SLP_STMT (stmt_info) part.
>
> 2011-10-20  Jakub Jelinek  
>
>        * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument.
>        Handle vectorization of SLP calls.
>        (vect_analyze_stmt): Adjust caller, add call to it for SLP too.
>        (vect_transform_stmt): Adjust vectorizable_call caller, remove
>        assertion.
>        * tree-vect-slp.c (vect_get_and_check_slp_defs): Handle one
>        and two argument calls too.
>        (vect_build_slp_tree): Allow CALL_EXPR.
>        (vect_get_slp_defs): Handle calls.
>
> --- gcc/tree-vect-stmts.c.jj    2011-10-20 14:13:34.0 +0200
> +++ gcc/tree-vect-stmts.c       2011-10-20 18:02:43.0 +0200
> @@ -1483,7 +1483,8 @@ vectorizable_function (gimple call, tree
>    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
>
>  static bool
> -vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
> +vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> +                  slp_tree slp_node)
>  {
>   tree vec_dest;
>   tree scalar_dest;
> @@ -1494,6 +1495,7 @@ vectorizable_call (gimple stmt, gimple_s
>   int nunits_in;
>   int nunits_out;
>   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
>   tree fndecl, new_temp, def, rhs_type;
>   gimple def_stmt;
>   enum vect_def_type dt[3]
> @@ -1505,19 +1507,12 @@ vectorizable_call (gimple stmt, gimple_s
>   size_t i, nargs;
>   tree lhs;
>
> -  /* FORNOW: unsupported in basic block SLP.  */
> -  gcc_assert (loop_vinfo);
> -
> -  if (!STMT_VINFO_RELEVANT_P (stmt_info))
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>     return false;
>
>   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
>     return false;
>
> -  /* FORNOW: SLP not supported.  */
> -  if (STMT_SLP_TYPE (stmt_info))
> -    return false;
> -
>   /* Is STMT a vectorizable call?   */
>   if (!is_gimple_call (stmt))
>     return false;
> @@ -1558,7 +1553,7 @@ vectorizable_call (gimple stmt, gimple_s
>       if (!rhs_type)
>        rhs_type = TREE_TYPE (op);
>
> -      if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
> +      if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
>                                 &def_stmt, &def, &dt[i], &opvectype))
>        {
>          if (vect_print_dump_info (REPORT_DETAILS))
> @@ -1620,7 +1615,13 @@ vectorizable_call (gimple stmt, gimple_s
>
>   gcc_assert (!gimple_vuse (stmt));
>
> -  if (modifier == NARROW)
> +  if (slp_node || PURE_SLP_STMT (stmt_info))
> +    {
> +      if (modifier != NONE)
> +       return false;
> +      ncopies = 1;
> +    }

If you want to bail out if it's SLP and modifier != NONE, this check
is not enough. PURE_SLP means the stmt is not used outside the SLP
instance, so for hybrid SLP stmts (those that have uses outside SLP)
this check will not work. You need

  if (modifier != NONE && STMT_SLP_TYPE (stmt_info))
 return false;

But I wonder why not allow different type sizes? I see that we fail in
such cases in vectorizable_conversion too, but I think we should
support this as well.

> +  else if (modifier == NARROW)
>     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
>   else
>     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
> @@ -1659,6 +1660,43 @@ vectorizable_call (gimple stmt, gimple_s
>          else
>            VEC_truncate (tree, vargs, 0);
>
> +         if (slp_node)
> +           {
> +             VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
> +
> +             gcc_

Re: [RFC PATCH] SLP vectorize calls

2011-10-21 Thread Ira Rosen
On 21 October 2011 14:52, Jakub Jelinek  wrote:
> On Fri, Oct 21, 2011 at 02:37:06PM +0200, Ira Rosen wrote:
>> > @@ -1620,7 +1615,13 @@ vectorizable_call (gimple stmt, gimple_s
>> >
>> >   gcc_assert (!gimple_vuse (stmt));
>> >
>> > -  if (modifier == NARROW)
>> > +  if (slp_node || PURE_SLP_STMT (stmt_info))
>> > +    {
>> > +      if (modifier != NONE)
>> > +       return false;
>> > +      ncopies = 1;
>> > +    }
>>
>> If you want to bail out if it's SLP and modifier != NONE, this check
>> is not enough. PURE_SLP means the stmt is not used outside the SLP
>> instance, so for hybrid SLP stmts (those that have uses outside SLP)
>> this check will not work. You need
>>
>>   if (modifier != NONE && STMT_SLP_TYPE (stmt_info))
>>      return false;
>
> I just blindly copied what vectorizable_operation does, without
> too much understanding what PURE_SLP_STMT or STMT_SLP_TYPE etc. mean.
> Didn't get that far.
> But modifier != NONE && something would sometimes allow modifier != NONE
> through, which at least the current code isn't prepared to handle.
> Did you mean || instead?

But it's OK to allow modifier != NONE if it's not SLP, so we need &&, no?
Something like:

if (modifier != NONE && STMT_SLP_TYPE (stmt_info))
   return false;

if (slp_node || PURE_SLP_STMT (stmt_info))
   ncopies = 1;
else if (modifier == NARROW)
   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
else
   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;

>
>> But I wonder why not allow different type sizes? I see that we fail in
>> such cases in vectorizable_conversion too, but I think we should
>> support this as well.
>
> Merely because I don't know SLP enough, vectorizable_operation also
> handles just same size to same size, so I didn't have good examples
> on how to do it.  For loops narrowing or widening operations are
> handled through ncopies != 1, but for SLP it seems it is always
> asserted it is 1...

There are vectorizable_type_promotion/demotion, and for the rest the
copies are "hidden" inside multiple vector operands that you get from
vect_get_vec_defs. But, of course, there is not need to handle
modifier == NARROW for SLP at the moment. I was just wondering out
loud.

Ira

>
>        Jakub
>


Re: [RFC PATCH] SLP vectorize calls

2011-10-21 Thread Ira Rosen
On 21 October 2011 16:25, Jakub Jelinek  wrote:
> On Fri, Oct 21, 2011 at 03:44:11PM +0200, Ira Rosen wrote:
>> But it's OK to allow modifier != NONE if it's not SLP, so we need &&, no?
>
> Well, in my patch that check was guarded by the if (slp_node ...),
> so presumably it would allow modifier == NARROW vectorization in the loops
> (otherwise some testcases would fail I'd hope).

The problem with that is that slp_node can be NULL but it can still be
an SLP stmt (as you probably have guessed judging by the following
questions ;))

>
> Is gcc_assert ((slp_node != NULL) == (STMT_SLP_TYPE (stmt_info) != 0));
> always true?

No.

> If not, when it is not?

STMT_SLP_TYPE (stmt_info) != 0 may mean HYBRID_SLP_STMT, meaning that
we are vectorizing the stmt both as SLP and as regular loop
vectorization. So in the regular loop transformation of a hybrid stmt
(STMT_SLP_TYPE (stmt_info) != 0) doesn't (entail slp_node != NULL).

The other direction is always true.

> When would be slp_node == NULL
> and PURE_SLP_STMT true?

In the analysis of loop SLP. In loop SLP we analyze all the stmts of
the loop in their original order (and not as in BB SLP where we just
analyze SLP nodes). A stmt can belong to more than one SLP node, and
we may also need to vectorize it in a regular loop-vectorization way
at the same time. So, during the analysis we don't have stmt's SLP
node. (Note that during the analysis we need to know ncopies only to
verify that the operation is supported and for cost estimation).
And this is another case when 'if (STMT_SLP_TYPE (stmt_info) != 0)
then (slp_node != NULL)' is false.

I hope this makes sense.
Ira

>
>        Jakub
>


[wwwdocs][committed] Update vectorizer's webpage

2011-10-23 Thread Ira Rosen
Hi,

I committed the attached update.

Ira
? yy
cvs diff: Diffing .
Index: vectorization.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/projects/tree-ssa/vectorization.html,v
retrieving revision 1.27
diff -r1.27 vectorization.html
9,12c9,10
< The goal of this project is to develop a loop vectorizer in
< GCC, based on the tree-ssa framework. This
< work is taking place in the autovect-branch, and is merged periodically
< to mainline.
---
> The goal of this project is to develop a loop and basic block 
> vectorizer in
> GCC, based on the tree-ssa framework.
36a35,69
>   2011-10-23
>  
>  
> Vectorization of reduction in loop SLP.
> Both  multiple reduction cycles and 
>  reduction chains are supported. 
> Various  basic block vectorization (SLP)
> improvements, such as
> better data dependence analysis, support of misaligned accesses
> and multiple types, cost model.
> Detection of vector size:
>  "http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00441.html";>
> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg00441.html.
> Vectorization of loads with  negative 
> step.
> Improved realignment scheme: 
>  "http://gcc.gnu.org/ml/gcc-patches/2010-06/msg02301.html";>
> http://gcc.gnu.org/ml/gcc-patches/2010-06/msg02301.html.
> A new built-in, 
> __builtin_assume_aligned, has been added,
> through which the compiler can be hinted about pointer 
> alignment.
> Support of  strided accesses using
> memory instructions that have
> the interleaving "built in", such as NEON's vldN and vstN.
> The vectorizer now attempts to reduce over-promotion of operands 
> in some vector
> operations:  "http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01472.html";>
> http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01472.html.
>  Widening shifts are now detected and 
> vectorized
> if supported by the target.
> Vectorization of conditions with  mixed 
> types.
> Support of loops with  bool.
> 
> 
> 
> 
> 
44c77
< other then reduction cycles in nested loops) (2009-06-16)
---
> other than reduction cycles in nested loops) (2009-06-16)
82c115,116
<     to this project include Revital Eres, Richard Guenther, and Ira Rosen.
---
> to this project include Revital Eres, Richard Guenther, Jakub Jelinek, 
> Michael Matz,
> Richard Sandiford, and Ira Rosen.
279c313
< example11: 
---
> example11:
323d356
<  
341d373
< 
356d387
< 
361d391
< 
371a402,498
> 
> 
> example18: Simple reduction in SLP:
> 
> int sum1;
> int sum2;
> int a[128];
> void foo (void)
> {
>   int i;
> 
>   for (i = 0; i < 64; i++)
> {
>   sum1 += a[2*i];
>   sum2 += a[2*i+1];
> }
> }
> 
> 
> example19: Reduction chain in SLP:
> 
> int sum;
> int a[128];
> void foo (void)
> {
>   int i;
> 
>   for (i = 0; i < 64; i++)
> {
>   sum += a[2*i];
>   sum += a[2*i+1];
> }
> }
> 
> 
> example20: Basic block SLP with
> multiple types, loads with different offsets, misaligned load,
> and not-affine accesses:
> 
> void foo (int * __restrict__ dst, short * __restrict__ src,
>   int h, int stride, short A, short B)
> {
>   int i;
>   for (i = 0; i < h; i++)
> {
>   dst[0] += A*src[0] + B*src[1];
>   dst[1] += A*src[1] + B*src[2];
>   dst[2] += A*src[2] + B*src[3];
>   dst[3] += A*src[3] + B*src[4];
>   dst[4] += A*src[4] + B*src[5];
>   dst[5] += A*src[5] + B*src[6];
>   dst[6] += A*src[6] + B*src[7];
>   dst[7] += A*src[7] + B*src[8];
>   dst += stride;
>   src += stride;
> }
> }
> 
> 
> example21: Backward access:
> 
> int foo (int *b, int n)
> {
>   int i, a = 0;
> 
>   for (i = n-1; i ≥ 0; i--)
> a += b[i];
> 
>   return a;
> }
> 
> 
> example22: Alignment hints:
> 
> void foo (int *out1, int *in1, int *in2, int n)
> {
>   int i;
> 
>   out1 = __builtin_assume_aligned (out1, 32, 16);
>   in1 = __builtin_assume_aligned (in1, 32, 16);
>   in2 = __builtin_assume_aligned (in2, 32, 0);
> 
>   for (i = 0; i < n; i++)
> out1[i] = in1[i] * in2[i];
> }
> 
> 
> example23: Widening shift:
> 
> void foo (unsigned short *src, unsigned int *dst)
> {
>   int i;
> 
>   for (i = 0; i &l

[patch] SLP data dependence testing - PR 50819

2011-10-23 Thread Ira Rosen
Hi,

When there is pair of data-refs with unknown dependence in basic block
SLP we currently require all the loads in the basic block to be before
all the stores in order to avoid load after store dependencies. But
this is too conservative. It's enough to check that in the pairs of
loads and stores with unknown and known dependence, the load comes
first. This is already done for the known case. This patch adds such
check for unknown dependencies and removes
vect_bb_vectorizable_with_dependencies.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50819
* tree-vectorizer.h (vect_analyze_data_ref_dependences): Remove
the last argument.
* tree-vect-loop.c (vect_analyze_loop_2): Update call to
vect_analyze_data_ref_dependences.
* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Remove
the last argument.  Check load-after-store dependence for unknown
dependencies in basic blocks.
(vect_analyze_data_ref_dependences): Update call to
vect_analyze_data_ref_dependences.
* tree-vect-patterns.c (vect_recog_widen_shift_pattern): Fix typo.
* tree-vect-slp.c (vect_bb_vectorizable_with_dependencies): Remove.
(vect_slp_analyze_bb_1): Update call to
vect_analyze_data_ref_dependences.  Don't call
vect_bb_vectorizable_with_dependencies.

testsuite/Changelog:

PR tree-optimization/50819
* g++.dg/vect/vect.exp: Set target dependent flags for slp-* tests.
* g++.dg/vect/slp-pr50819.cc: New test.
Index: ChangeLog
===
--- ChangeLog   (revision 180333)
+++ ChangeLog   (working copy)
@@ -1,3 +1,21 @@
+2011-10-23  Ira Rosen  
+
+   PR tree-optimization/50819
+   * tree-vectorizer.h (vect_analyze_data_ref_dependences): Remove
+   the last argument.
+   * tree-vect-loop.c (vect_analyze_loop_2): Update call to
+   vect_analyze_data_ref_dependences.
+   * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Remove
+   the last argument.  Check load-after-store dependence for unknown
+   dependencies in basic blocks.
+   (vect_analyze_data_ref_dependences): Update call to
+   vect_analyze_data_ref_dependences.
+   * tree-vect-patterns.c (vect_recog_widen_shift_pattern): Fix typo.
+   * tree-vect-slp.c (vect_bb_vectorizable_with_dependencies): Remove.
+   (vect_slp_analyze_bb_1): Update call to
+   vect_analyze_data_ref_dependences.  Don't call
+   vect_bb_vectorizable_with_dependencies.
+
 2011-10-22  David S. Miller  
 
* config/sparc/sparc.h (SECONDARY_INPUT_RELOAD_CLASS,
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog (revision 180333)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,9 @@
+2011-10-23  Ira Rosen  
+
+   PR tree-optimization/50819
+   * g++.dg/vect/vect.exp: Set target dependent flags for slp-* tests.
+   * g++.dg/vect/slp-pr50819.cc: New test.
+
 2011-10-21  Paolo Carlini  
 
PR c++/45385
Index: testsuite/g++.dg/vect/vect.exp
===
--- testsuite/g++.dg/vect/vect.exp  (revision 180333)
+++ testsuite/g++.dg/vect/vect.exp  (working copy)
@@ -42,12 +42,6 @@ set DEFAULT_VECTCFLAGS ""
 # These flags are used for all targets.
 lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fno-vect-cost-model"
 
-set VECT_SLP_CFLAGS $DEFAULT_VECTCFLAGS
-
-lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
-lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
-
-
 # Skip these tests for targets that do not support generating vector
 # code.  Set additional target-dependent vector flags, which can be
 # overridden by using dg-options in individual tests.
@@ -55,6 +49,11 @@ if ![check_vect_support_and_set_flags] {
 return
 }
 
+set VECT_SLP_CFLAGS $DEFAULT_VECTCFLAGS
+
+lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
+lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
+
 # Initialize `dg'.
 dg-init
 
Index: testsuite/g++.dg/vect/slp-pr50819.cc
===
--- testsuite/g++.dg/vect/slp-pr50819.cc(revision 0)
+++ testsuite/g++.dg/vect/slp-pr50819.cc(revision 0)
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+
+typedef float Value;
+
+struct LorentzVector
+{
+
+  LorentzVector(Value x=0, Value  y=0, Value  z=0, Value  t=0) :
+theX(x),theY(y),theZ(z),theT(t){}
+  LorentzVector & operator+=(const LorentzVector & a) {
+theX += a.theX;
+theY += a.theY;
+theZ += a.theZ;
+theT += a.theT;
+return *this;
+  }
+
+  Value theX;
+  Value theY;
+  Value theZ;
+  Value theT;
+}  __attribute__ ((aligned(16)));
+
+

[patch] Partial SLP - PR 50730

2011-10-24 Thread Ira Rosen
Hi,

With this patch we are able to stop basic block analysis in case of
unsupported data-ref and still vectorize the first part of the basic
block.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50730
* tree-vect-data-refs.c (vect_analyze_data_refs): Stop basic block
analysis if encountered unsupported data-ref.

testsuite/ChangeLog:

PR tree-optimization/50730
* gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c: New test.
* gcc.dg/vect/vect.exp: Run no-tree-sra-bb-slp* tests with
-fno-tree-sra and SLP flags.

Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c   (revision 180364)
+++ tree-vect-data-refs.c   (working copy)
@@ -2524,7 +2524,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   VEC (data_reference_p, heap) *datarefs;
   struct data_reference *dr;
   tree scalar_type;
-  bool res;
+  bool res, stop_bb_analysis = false;

   if (vect_print_dump_info (REPORT_DETAILS))
 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
@@ -2579,12 +2579,19 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
 {
   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump, "not vectorized: unhandled data-ref ");
+
   return false;
 }

   stmt = DR_STMT (dr);
   stmt_info = vinfo_for_stmt (stmt);

+  if (stop_bb_analysis)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  continue;
+}
+
   /* Check that analysis of the data-ref succeeded.  */
   if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
   || !DR_STEP (dr))
@@ -2595,6 +2602,13 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }

+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }

@@ -2603,7 +2617,15 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 fprintf (vect_dump, "not vectorized: base addr of dr is a "
  "constant");
-  return false;
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
+   return false;
 }

   if (TREE_THIS_VOLATILE (DR_REF (dr)))
@@ -2613,6 +2635,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   fprintf (vect_dump, "not vectorized: volatile type ");
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }

@@ -2628,6 +2658,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
"exception ");
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }

@@ -2745,6 +2783,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
"not vectorized: more than one data ref in stmt: ");
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }

@@ -2769,6 +2815,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
 {
   /* Mark the statement as not vectorizable.  */
   STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
   continue;
 }
   else
Index: testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c
===
--- testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c  (revision 0)
+++ testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c  (revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+
+typedef __complex__ float Value;
+typedef struct {
+  Value a[16 / sizeof (Value)];
+} A;
+
+A sum(A a,A b)
+{
+  a.a[0]+=b.a[0];
+  a.a[1]+=b.a[1];
+  return a;
+}
+
+/* { dg-final { scan-tree-dump-times "not vectorized: more than one
data ref in stmt" 0 "slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
Index: testsuite/gcc.dg/vect/vec

Re: [PATCH] Pattern recognize shifts with different rhs1/rhs2 types

2011-10-30 Thread Ira Rosen
On 28 October 2011 20:44, Jakub Jelinek  wrote:
> Hi!

Hi,

>
> This patch implements what I've talked about, with this we can now
> with -mavx2 as well as -mxop vectorize long long/unsigned long long
> shifts by int or long long/unsigned long long shifts by long long
> (where the FE casts it to int first).  Already covered by the *vshift-*
> testcases I've committed recently (eyeballed for -mxop plus link tested,
> for -mavx2 tested on sde).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Ira

>
> 2011-10-28  Jakub Jelinek  
>
>        * tree-vectorizer.h (NUM_PATTERNS): Bump to 9.
>        * tree-vect-patterns.c (vect_recog_vector_vector_shift_pattern): New
>        function.
>        (vect_vect_recog_func_ptrs): Add it.
>
> --- gcc/tree-vectorizer.h.jj    2011-10-27 08:42:51.0 +0200
> +++ gcc/tree-vectorizer.h       2011-10-28 16:26:30.0 +0200
> @@ -902,7 +902,7 @@ extern void vect_slp_transform_bb (basic
>    Additional pattern recognition functions can (and will) be added
>    in the future.  */
>  typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree 
> *);
> -#define NUM_PATTERNS 8
> +#define NUM_PATTERNS 9
>  void vect_pattern_recog (loop_vec_info);
>
>  /* In tree-vectorizer.c.  */
> --- gcc/tree-vect-patterns.c.jj 2011-10-26 14:19:11.0 +0200
> +++ gcc/tree-vect-patterns.c    2011-10-28 17:41:26.0 +0200
> @@ -51,6 +51,8 @@ static gimple vect_recog_over_widening_p
>                                                  tree *);
>  static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
>                                        tree *, tree *);
> +static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
> +                                                     tree *, tree *);
>  static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
>                                                  tree *, tree *);
>  static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree 
> *);
> @@ -61,6 +63,7 @@ static vect_recog_func_ptr vect_vect_rec
>        vect_recog_pow_pattern,
>        vect_recog_over_widening_pattern,
>        vect_recog_widen_shift_pattern,
> +       vect_recog_vector_vector_shift_pattern,
>        vect_recog_mixed_size_cond_pattern,
>        vect_recog_bool_pattern};
>
> @@ -1439,6 +1442,133 @@ vect_recog_widen_shift_pattern (VEC (gim
>   return pattern_stmt;
>  }
>
> +/* Detect a vector by vector shift pattern that wouldn't be otherwise
> +   vectorized:
> +
> +   type a_t;
> +   TYPE b_T, res_T;
> +
> +   S1 a_t = ;
> +   S2 b_T = ;
> +   S3 res_T = b_T op a_t;
> +
> +  where type 'TYPE' is a type with different size than 'type',
> +  and op is <<, >> or rotate.
> +
> +  Also detect cases:
> +
> +   type a_t;
> +   TYPE b_T, c_T, res_T;
> +
> +   S0 c_T = ;
> +   S1 a_t = (type) c_T;
> +   S2 b_T = ;
> +   S3 res_T = b_T op a_t;
> +
> +  Input/Output:
> +
> +  * STMTS: Contains a stmt from which the pattern search begins,
> +    i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
> +    with a shift/rotate which has same type on both operands, in the
> +    second case just b_T op c_T, in the first case with added cast
> +    from a_t to c_T in STMT_VINFO_PATTERN_DEF_STMT.
> +
> +  Output:
> +
> +  * TYPE_IN: The type of the input arguments to the pattern.
> +
> +  * TYPE_OUT: The type of the output of this pattern.
> +
> +  * Return value: A new stmt that will be used to replace the shift/rotate
> +    S3 stmt.  */
> +
> +static gimple
> +vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
> +                                       tree *type_in, tree *type_out)
> +{
> +  gimple last_stmt = VEC_pop (gimple, *stmts);
> +  tree oprnd0, oprnd1, lhs, var;
> +  gimple pattern_stmt, def_stmt;
> +  enum tree_code rhs_code;
> +  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
> +  enum vect_def_type dt;
> +  tree def;
> +
> +  if (!is_gimple_assign (last_stmt))
> +    return NULL;
> +
> +  rhs_code = gimple_assign_rhs_code (last_stmt);
> +  switch (rhs_code)
> +    {
> +    case LSHIFT_EXPR:
> +    case RSHIFT_EXPR:
> +    case LROTATE_EXPR:
> +    case RROTATE_EXPR:
> +      break;
> +    default:
> +      return NULL;
> +    }
> +
> +  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
> +    return NULL;
> +
> +  lhs = gimple_assign_lhs (last_stmt);
> +  oprnd0 = gimple_assign_rhs1 (last_stmt);
> +  oprnd1 = gimple_assign_rhs2 (last_stmt);
> +  if (TREE_CODE (oprnd0) != SSA_NAME
> +      || TREE_CODE (oprnd1) != SSA_NAME
> +      || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
> +      || TYPE_PRECISION (TREE_TYPE (oprnd1))
> +        != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
> +      || TYPE_PRECISION (TREE_TYPE (lhs))
> +        != TYPE_PRECISION (TREE_TYPE (oprnd0)))
> +    return NULL;
> +
> +  if (!vect_is_simple_use (op

Re: [PATCH] Re: vector shift regression on sparc

2011-10-31 Thread Ira Rosen
On 31 October 2011 11:53, Jakub Jelinek  wrote:
> On Sun, Oct 30, 2011 at 12:38:32AM -0400, David Miller wrote:
>> gcc.dg/pr48616.c segfaults on sparc as of a day or two ago
>>
>> vectorizable_shift() crashes because op1_vectype is NULL and
>> we hit this code path:
>>
>>   /* Vector shifted by vector.  */
>>   if (!scalar_shift_arg)
>>     {
>>       optab = optab_for_tree_code (code, vectype, optab_vector);
>>       if (vect_print_dump_info (REPORT_DETAILS))
>>       fprintf (vect_dump, "vector/vector shift/rotate found.");
>> =>    if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>>
>> dt[1] is vect_external_def and slp_node is non-NULL.
>>
>> Indeed, when the 'dt' arg to vect_is_simple_use_1() is
>> vect_external_def *vectype will be set to NULL.
>
> Here is a fix for that (and other issues that show up on these
> testcases with -O3 -mxop if I disable all vector/scalar shift expanders
> in sse.md).
> For SLP it currently gives up more often than for loop vectorization,
> I assume we could handle all dt[1] == vect_constant_def
> and dt[2] == vect_external_def cases for SLP (and at least the former
> even if the constants differ between nodes) by building the vectors by hand,
> though the current vect_get_vec_defs/vect_get_vec_defs_for_stmt_copy can't
> be used for that as is.
>
> 2011-10-28  Jakub Jelinek  
>
>        * tree-vect-stmts.c (vectorizable_shift): If op1 is vect_external_def
>        in a loop and has different type from op0, cast it to op0's type
>        before the loop first.  For slp give up.  Don't crash if op1_vectype
>        is NULL.
>
>        * gcc.dg/vshift-3.c: New test.
>        * gcc.dg/vshift-4.c: New test.
>        * gcc.dg/vshift-5.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj    2011-10-28 16:21:06.0 +0200
> +++ gcc/tree-vect-stmts.c       2011-10-31 10:27:57.0 +0100
> @@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
>       optab = optab_for_tree_code (code, vectype, optab_vector);
>       if (vect_print_dump_info (REPORT_DETAILS))
>         fprintf (vect_dump, "vector/vector shift/rotate found.");
> -      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
> +      if (!op1_vectype)
> +       op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
> +      if (op1_vectype == NULL_TREE
> +         || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>        {
>          if (vect_print_dump_info (REPORT_DETAILS))
>            fprintf (vect_dump, "unusable type for last operand in"
> @@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
>               /* Unlike the other binary operators, shifts/rotates have
>                  the rhs being int, instead of the same type as the lhs,
>                  so make sure the scalar is the right type if we are
> -                 dealing with vectors of short/char.  */
> +                dealing with vectors of long long/long/short/char.  */
>               if (dt[1] == vect_constant_def)
>                 op1 = fold_convert (TREE_TYPE (vectype), op1);
> +             else if (!useless_type_conversion_p (TREE_TYPE (vectype),
> +                                                  TREE_TYPE (op1)))

What happens in case dt[1] == vect_internal_def?

Thanks,
Ira

> +               {
> +                 if (slp_node
> +                     && TYPE_MODE (TREE_TYPE (vectype))
> +                        != TYPE_MODE (TREE_TYPE (op1)))
> +                   {
> +                     if (vect_print_dump_info (REPORT_DETAILS))
> +                     fprintf (vect_dump, "unusable type for last operand in"
> +                                         " vector/vector shift/rotate.");
> +                       return false;
> +                   }
> +                 if (vec_stmt && !slp_node)
> +                   {
> +                     op1 = fold_convert (TREE_TYPE (vectype), op1);
> +                     op1 = vect_init_vector (stmt, op1,
> +                                             TREE_TYPE (vectype), NULL);
> +                   }
> +               }
>             }
>         }
>     }

>
>
>        Jakub
>


Re: [PATCH] Re: vector shift regression on sparc

2011-10-31 Thread Ira Rosen
On 31 October 2011 13:23, Jakub Jelinek  wrote:
> On Mon, Oct 31, 2011 at 01:14:25PM +0200, Ira Rosen wrote:
>> > --- gcc/tree-vect-stmts.c.jj    2011-10-28 16:21:06.0 +0200
>> > +++ gcc/tree-vect-stmts.c       2011-10-31 10:27:57.0 +0100
>> > @@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
>> >       optab = optab_for_tree_code (code, vectype, optab_vector);
>> >       if (vect_print_dump_info (REPORT_DETAILS))
>> >         fprintf (vect_dump, "vector/vector shift/rotate found.");
>> > -      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>> > +      if (!op1_vectype)
>> > +       op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), 
>> > vectype_out);
>> > +      if (op1_vectype == NULL_TREE
>> > +         || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>> >        {
>> >          if (vect_print_dump_info (REPORT_DETAILS))
>> >            fprintf (vect_dump, "unusable type for last operand in"
>> > @@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
>> >               /* Unlike the other binary operators, shifts/rotates have
>> >                  the rhs being int, instead of the same type as the lhs,
>> >                  so make sure the scalar is the right type if we are
>> > -                 dealing with vectors of short/char.  */
>> > +                dealing with vectors of long long/long/short/char.  */
>> >               if (dt[1] == vect_constant_def)
>> >                 op1 = fold_convert (TREE_TYPE (vectype), op1);
>> > +             else if (!useless_type_conversion_p (TREE_TYPE (vectype),
>> > +                                                  TREE_TYPE (op1)))
>>
>> What happens in case dt[1] == vect_internal_def?
>
> For !slp_node we can't reach this with dt1[1] == vect_internal_def,
> because of:
>  if (dt[1] == vect_internal_def && !slp_node)
>    scalar_shift_arg = false;
> And for slp_node I'm just giving up if type modes don't match:
>
>> > +               {
>> > +                 if (slp_node
>> > +                     && TYPE_MODE (TREE_TYPE (vectype))
>> > +                        != TYPE_MODE (TREE_TYPE (op1)))
>> > +                   {
>> > +                     if (vect_print_dump_info (REPORT_DETAILS))
>> > +                     fprintf (vect_dump, "unusable type for last operand 
>> > in"
>> > +                                         " vector/vector shift/rotate.");
>> > +                       return false;
>> > +                   }
>

Ah, OK.

> BTW, even the pre-existing if (dt[1] == vect_constant_def) doesn't seem to
> be 100% correct for slp_node != NULL, I think vect_get_constant_vectors
> will in that case create a VECTOR_CST with the desirable vector type
> (same type mode as op0's vector type mode), but the constants in the
> VECTOR_CST will have a wrong type (say V4DImode VECTOR_CST with
> SImode constants in its constructor).  The expander doesn't ICE on it
> though.

Right. As you wrote before, we should probably change shift vectors
creation for SLP.

The patch is OK.

Thanks,
Ira

>
>        Jakub
>


[patch] Update gcc.dg/vect/no-scevccp-outer-6-global.c

2011-11-01 Thread Ira Rosen
Hi,

With the recent patches for __restrict__, the outer loop in
gcc.dg/vect/no-scevccp-outer-6-global.c is now vectorizable, because
it doesn't require loop versioning for alias anymore.  The comment in
the test is probably obsolete, and checking for widen-mult doesn't
make much sense, because there is no multiplication here at all.

Tested on powerpc64-suse-linux.
Committed.

Ira

testsuite/ChangeLog:

* gcc.dg/vect/no-scevccp-outer-6-global.c: Expect to vectorize
the outer loop.  Remove comment.  Don't check for widen-mult.

Index: testsuite/gcc.dg/vect/no-scevccp-outer-6-global.c
===
--- testsuite/gcc.dg/vect/no-scevccp-outer-6-global.c   (revision 180733)
+++ testsuite/gcc.dg/vect/no-scevccp-outer-6-global.c   (working copy)
@@ -52,7 +52,5 @@
   return 0;
 }

-/* "Too many BBs in loop"  */
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1
"vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern:
detected" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1
"vect" { xfail vect_no_align } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */


Re: [patch] Rewrite SLP analysis towards support of operations with any number of operands

2011-11-04 Thread Ira Rosen
On 4 November 2011 09:50, Jakub Jelinek  wrote:
> The Changelog entry doesn't mention tree-vect-data-refs.c at all, yet
> the patch modifies it and the commit too.
> To me it looks like you've reverted the tree-vect-data-refs.c part
> of your PR50730
> http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=180367
> change and as can be seen on gcc-testresults, the testcase now fails
> FAIL: gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c scan-tree-dump-times slp "not 
> vectorized: more than one data ref in stmt" 0
>
> Was it intentional?

No, I don't understand how it happened. Thanks for noticing!
I'll unrevert it after testing on powerpc64-suse-linux.

Thanks,
Ira

>
>
>        Jakub
>
Index: ChangeLog
===
--- ChangeLog   (revision 180930)
+++ ChangeLog   (working copy)
@@ -1,3 +1,12 @@
+2011-11-04  Ira Rosen  
+
+   Unrevert:
+   2011-10-24  Ira Rosen  
+
+   PR tree-optimization/50730
+   * tree-vect-data-refs.c (vect_analyze_data_refs): Stop basic block
+   analysis if encountered unsupported data-ref.
+
 2011-11-04  Tristan Gingold  
 
* config/alpha/alpha.c (alpha_write_linkage): Remove fundecl
Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c   (revision 180930)
+++ tree-vect-data-refs.c   (working copy)
@@ -2524,7 +2524,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   VEC (data_reference_p, heap) *datarefs;
   struct data_reference *dr;
   tree scalar_type;
-  bool res;
+  bool res, stop_bb_analysis = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
@@ -2586,6 +2586,12 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   stmt = DR_STMT (dr);
   stmt_info = vinfo_for_stmt (stmt);
 
+  if (stop_bb_analysis)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  continue;
+}
+
   /* Check that analysis of the data-ref succeeded.  */
   if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
   || !DR_STEP (dr))
@@ -2596,6 +2602,13 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
 
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }
 
@@ -2604,7 +2617,15 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 fprintf (vect_dump, "not vectorized: base addr of dr is a "
  "constant");
-  return false;
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
+   return false;
 }
 
   if (TREE_THIS_VOLATILE (DR_REF (dr)))
@@ -2614,6 +2635,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
   fprintf (vect_dump, "not vectorized: volatile type ");
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }
 
@@ -2629,6 +2658,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
"exception ");
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }
 
@@ -2746,6 +2783,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
"not vectorized: more than one data ref in stmt: ");
   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 }
+
+  if (bb_vinfo)
+{
+  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
+  continue;
+}
+
   return false;
 }
 
@@ -2770,6 +2815,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
 {
   /* Mark the statement as not vectorizable.  */
   STMT_VINFO_VECTORIZABLE (stmt_info) = false;
+  stop_bb_analysis = true;
   continue;
 }
   else


[patch] SLP conditions

2011-11-06 Thread Ira Rosen
Hi,

This patch adds a support of conditions in SLP.
It also fixes a bug in pattern handling in SLP (we should put pattern
statements instead of original statements in the root), and allows
pattern def-stmts in SLP.

Bootstrapped on powerpc64-suse-linux and tested on
powerpc64-suse-linux and x86_64-suse-linux.
Committed.

Ira

ChangeLog:

* tree-vectorizer.h (vectorizable_condition): Add argument.
* tree-vect-loop.c (vectorizable_reduction): Fail for condition
in SLP.  Update calls to vectorizable_condition.
* tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to
the arguments.  Pass it to vect_is_simple_use_1.
(vectorizable_condition): Add slp_node to the arguments.  Support
vectorization of basic blocks.  Fail for reduction in SLP.  Update
calls to vect_is_simple_cond and vect_is_simple_use.  Support SLP:
call vect_get_slp_defs to get vector operands.
(vect_analyze_stmt): Update calls to vectorizable_condition.
(vect_transform_stmt): Likewise.
* tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR.
(vect_get_and_check_slp_defs): Handle COND_EXPR.  Allow pattern
def stmts.
(vect_build_slp_tree): Handle COND_EXPR.
(vect_analyze_slp_instance): Push pattern statements to root node.
(vect_get_constant_vectors): Fix comments.  Handle COND_EXPR.

testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-cond-1.c: New test.
* gcc.dg/vect/slp-cond-1.c: New test.
* gcc.dg/vect/slp-cond-2.c: New test.
Index: testsuite/gcc.dg/vect/bb-slp-cond-1.c
===
--- testsuite/gcc.dg/vect/bb-slp-cond-1.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-cond-1.c   (revision 0)
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__((noinline, noclone)) void
+foo (int *a, int stride)
+{
+  int i;
+
+  for (i = 0; i < N/stride; i++, a += stride)
+   {
+ a[0] = a[0] ? 1 : 5;
+ a[1] = a[1] ? 2 : 6;
+ a[2] = a[2] ? 3 : 7;
+ a[3] = a[3] ? 4 : 8;
+   }
+}
+
+
+int a[N];
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+a[i] = i;
+
+  foo (a, 4);
+
+  for (i = 1; i < N; i++)
+if (a[i] != i%4 + 1)
+  abort ();
+
+  if (a[0] != 5)
+abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 
"slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/gcc.dg/vect/slp-cond-1.c
===
--- testsuite/gcc.dg/vect/slp-cond-1.c  (revision 0)
+++ testsuite/gcc.dg/vect/slp-cond-1.c  (revision 0)
@@ -0,0 +1,126 @@
+/* { dg-require-effective-target vect_condition } */
+#include "tree-vect.h"
+
+#define N 32
+int a[N], b[N];
+int d[N], e[N];
+int k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N/4; i++)
+{
+  k[4*i] = a[4*i] < b[4*i] ? 17 : 0;
+  k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0;
+  k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0;
+  k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0;
+}
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N/2; ++i)
+{
+  k[2*i] = a[2*i] < b[2*i] ? 0 : 24;
+  k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4;
+}
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N/2; ++i)
+{
+  k[2*i] = a[2*i] < b[2*i] ? 51 : 12;
+  k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12;
+}
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N/2; ++i)
+{
+  int d0 = d[2*i], e0 = e[2*i];
+  int d1 = d[2*i+1], e1 = e[2*i+1];
+  k[2*i] = a[2*i] >= b[2*i] ? d0 : e0;
+  k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1;
+}
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  switch (i % 9)
+   {
+   case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+   case 1: a[i] = 0; b[i] = 0; break;
+   case 2: a[i] = i + 1; b[i] = - i - 1; break;
+   case 3: a[i] = i; b[i] = i + 7; break;
+   case 4: a[i] = i; b[i] = i; break;
+   case 5: a[i] = i + 16; b[i] = i + 3; break;
+   case 6: a[i] = - i - 5; b[i] = - i; break;
+   case 7: a[i] = - i; b[i] = - i; break;
+   case 8: a[i] = - i; b[i] = - i - 7; break;
+   }
+  d[i] = i;
+  e[i] = 2 * i;
+}
+  f1 ();
+  for (i = 0; i < N; i++)
+if (k[i] != ((i % 3) == 0 ? 17 : 0))
+  abort ();
+
+  f2 ();
+  for (i = 0; i < N; i++)
+{
+  switch (i % 9)
+{
+case 0:
+   case 6:
+ if (k[i] != ((i/9 % 2) == 0 ? 0 : 7))
+   abort ();
+ break;
+case 1:
+case 5:
+case 7:
+ if (k[i] != ((i/9 % 2) == 0 ? 4 : 24))
+abor

Re: [PATCH] SLP vectorize calls (take 2)

2011-11-07 Thread Ira Rosen
On 7 November 2011 20:35, Jakub Jelinek  wrote:
> Hi!

Hi,

>
> Here is an updated patch, which handles both modifier == NONE
> and modifier == NARROW for SLP, after all it wasn't that hard.
> Additionally it checks that the fndecls and various call flags
> match, and adds some testcases.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux,
> ok for trunk?


> @@ -1723,6 +1764,55 @@ vectorizable_call (gimple stmt, gimple_s
>          else
>            VEC_truncate (tree, vargs, 0);
>
> +         if (slp_node)
> +           {
> +             VEC (slp_void_p, heap) *vec_defs
> +               = VEC_alloc (slp_void_p, heap, nargs);
> +             VEC (tree, heap) *vec_oprnds0;
> +
> +             for (i = 0; i < nargs; i++)
> +               VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
> +             vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
> +             vec_oprnds0
> +               = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
> +
> +             /* Arguments are ready.  Create the new vector stmt.  */
> +             FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)

Was this line left by mistake?

> +             for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
> +                  i += 2)
> +               {
> +                 size_t k;
> +                 VEC_truncate (tree, vargs, 0);
> +                 for (k = 0; k < nargs; k++)
> +                   {
> +                     VEC (tree, heap) *vec_oprndsk
> +                       = (VEC (tree, heap) *)
> +                         VEC_index (slp_void_p, vec_defs, k);
> +                     VEC_quick_push (tree, vargs,
> +                                     VEC_index (tree, vec_oprndsk, i));
> +                     VEC_quick_push (tree, vargs,
> +                                     VEC_index (tree, vec_oprndsk, i + 1));
> +                   }
> +                 new_stmt = gimple_build_call_vec (fndecl, vargs);
> +                 new_temp = make_ssa_name (vec_dest, new_stmt);
> +                 gimple_call_set_lhs (new_stmt, new_temp);
> +                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                 mark_symbols_for_renaming (new_stmt);
> +                 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
> +                                 new_stmt);
> +               }
> +
> +             for (i = 0; i < nargs; i++)
> +               {
> +                 VEC (tree, heap) *vec_oprndsi
> +                   = (VEC (tree, heap) *)
> +                     VEC_index (slp_void_p, vec_defs, i);
> +                 VEC_free (tree, heap, vec_oprndsi);
> +               }
> +             VEC_free (slp_void_p, heap, vec_defs);
> +             continue;
> +           }
> +
>          for (i = 0; i < nargs; i++)
>            {
>              op = gimple_call_arg (stmt, i);


Could you please rearrange the tests (separate basic blocks and loops)
and make them actually test that bbs/loops were vectorized?
Also there is no need in dg-do run.

OK otherwise.

Thanks,
Ira

> --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj        2011-11-07 
> 15:05:36.0 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c   2011-11-07 
> 15:07:10.0 +0100
> @@ -0,0 +1,100 @@
> +/* { dg-do run } */
> +
> +#include "tree-vect.h"
> +
> +extern float copysignf (float, float);
> +extern float sqrtf (float);
> +extern float fabsf (float);
> +extern void abort (void);
> +float a[64], b[64], c[64], d[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> +  a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
> +  a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
> +  a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
> +  a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
> +  a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
> +  a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
> +  a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
> +  a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (int n)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    {
> +      a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf 
> (d[4 * i + 0]);
> +      a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf 
> (d[4 * i + 1]);
> +      a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf 
> (d[4 * i + 2]);
> +      a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf 
> (d[4 * i + 3]);
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (int n)
> +{
> +  int i;
> +  for (i = 0; i < 2 * n; i++)
> +    {
> +      a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf 
> (d[2 * i + 0]);
> +      a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf 
> (d[2 * i + 1]);
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> +  int i;
> +  for (i = 0; i < 64; i++

Re: [PATCH] SLP vectorize calls (take 2)

2011-11-08 Thread Ira Rosen
On 8 November 2011 09:22, Jakub Jelinek  wrote:

> First of all, whether copysignf, sqrtf and/or lrint are vectorized is
> very much target specific, should I guard the dg-final lines with
> { target { i?86-*-* x86_64-*-* } }
> resp.
> { target { { i?86-*-* x86_64-*-* } && !lp64 } }
> (the latter for lrint - we don't vectorize it on x86_64), or add
> vect_call_copysignf, vect_call_sqrtf, vect_call_lrint tests in *.exp?

The second option would be nicer.

>
> For the split, some fns are hybrid, so shall I split f1+f2+f3 as slp
> and f4 as loop, or is f3 (hybrid) something else?
>  What test names
> should I use?  fast-math-slp-call-*.c/fast-math-vect-call-*.c or something
> else?  From what I gather for bb slp the test should start with bb-slp-*
> (is that f1/f2 or just f1?), but then there is currently no way to
> add -ffast-math.

In fast-math-vect-call-1.c, f1 is basic block SLP, f2+f3 are loop SLP,
and f4 is regular loop vectorization.
So, f1 should be in fast-math-bb-slp-call-1.c,  with

/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1 "slp" } } */
/* { dg-final { cleanup-tree-dump "slp" } } */

and

# -ffast-math
set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
lappend VECT_SLP_CFLAGS "-ffast-math"
dg-runtest [lsort [glob -nocomplain
$srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]]  \
"" $VECT_SLP_CFLAGS

in  vect.exp.

The rest can simply stay in fast-math-vect-call-1.c, but to check SLP please use

/* { dg-final { scan-tree-dump-times "vectorized 1 loops" X "vect"  } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" Y
"vect"  } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

>
>> Also there is no need in dg-do run.
>
> You mean because it is the default?

Yes.

Thanks,
Ira

> Certainly it is useful to test
> that gcc doesn't miscompile the tests.
>
>        Jakub
>


Re: [PATCH] SLP vectorize calls (take 3)

2011-11-08 Thread Ira Rosen
On 8 November 2011 11:32, Jakub Jelinek  wrote:
> On Tue, Nov 08, 2011 at 10:03:23AM +0200, Ira Rosen wrote:
>> The second option would be nicer.
> ...
>
> Thanks.  Here is an updated patch, will bootstrap/regtest it now.
> Ok for trunk if it passes?

Yes.

Thanks,
Ira

>
> 2011-11-08  Jakub Jelinek  
>
>        * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument.
>        Handle vectorization of SLP calls.
>        (vect_analyze_stmt): Adjust caller, add call to it for SLP too.
>        (vect_transform_stmt): Adjust vectorizable_call caller, remove
>        assertion.
>        * tree-vect-slp.c (vect_get_and_check_slp_defs): For calls start
>        with op_idx 3.
>        (vect_build_slp_tree): Allow CALL_EXPR.
>
>        * lib/target-supports.exp (check_effective_target_vect_call_sqrtf,
>        check_effective_target_vect_call_copysignf,
>        check_effective_target_vect_call_lrint): New procedures.
>        * gcc.dg/vect/vect.exp: Run fast-math-bb-slp* tests using
>        $VECT_SLP_CFLAGS with -ffast-math.
>        * gcc.dg/vect/fast-math-vect-call-1.c: New test.
>        * gcc.dg/vect/fast-math-vect-call-2.c: New test.
>        * gcc.dg/vect/fast-math-bb-slp-call-1.c: New test.
>        * gcc.dg/vect/fast-math-bb-slp-call-2.c: New test.
>
> --- gcc/tree-vect-slp.c.jj      2011-11-07 20:32:03.0 +0100
> +++ gcc/tree-vect-slp.c 2011-11-08 09:28:12.0 +0100
> @@ -202,7 +202,10 @@ vect_get_and_check_slp_defs (loop_vec_in
>     loop = LOOP_VINFO_LOOP (loop_vinfo);
>
>   if (is_gimple_call (stmt))
> -    number_of_oprnds = gimple_call_num_args (stmt);
> +    {
> +      number_of_oprnds = gimple_call_num_args (stmt);
> +      op_idx = 3;
> +    }
>   else if (is_gimple_assign (stmt))
>     {
>       number_of_oprnds = gimple_num_ops (stmt) - 1;
> @@ -558,7 +561,25 @@ vect_build_slp_tree (loop_vec_info loop_
>       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
>
>       if (is_gimple_call (stmt))
> -       rhs_code = CALL_EXPR;
> +       {
> +         rhs_code = CALL_EXPR;
> +         if (gimple_call_internal_p (stmt)
> +             || gimple_call_tail_p (stmt)
> +             || gimple_call_noreturn_p (stmt)
> +             || !gimple_call_nothrow_p (stmt)
> +             || gimple_call_chain (stmt))
> +           {
> +             if (vect_print_dump_info (REPORT_SLP))
> +               {
> +                 fprintf (vect_dump,
> +                          "Build SLP failed: unsupported call type ");
> +                 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> +               }
> +
> +             vect_free_oprnd_info (&oprnds_info, true);
> +             return false;
> +           }
> +       }
>       else
>        rhs_code = gimple_assign_rhs_code (stmt);
>
> @@ -653,6 +674,27 @@ vect_build_slp_tree (loop_vec_info loop_
>              vect_free_oprnd_info (&oprnds_info, true);
>              return false;
>            }
> +
> +         if (rhs_code == CALL_EXPR)
> +           {
> +             gimple first_stmt = VEC_index (gimple, stmts, 0);
> +             if (gimple_call_num_args (stmt) != nops
> +                 || !operand_equal_p (gimple_call_fn (first_stmt),
> +                                      gimple_call_fn (stmt), 0)
> +                 || gimple_call_fntype (first_stmt)
> +                    != gimple_call_fntype (stmt))
> +               {
> +                 if (vect_print_dump_info (REPORT_SLP))
> +                   {
> +                     fprintf (vect_dump,
> +                              "Build SLP failed: different calls in ");
> +                     print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> +                   }
> +
> +                 vect_free_oprnd_info (&oprnds_info, true);
> +                 return false;
> +               }
> +           }
>        }
>
>       /* Strided store or load.  */
> @@ -786,7 +828,8 @@ vect_build_slp_tree (loop_vec_info loop_
>          /* Not memory operation.  */
>          if (TREE_CODE_CLASS (rhs_code) != tcc_binary
>              && TREE_CODE_CLASS (rhs_code) != tcc_unary
> -              && rhs_code != COND_EXPR)
> +             && rhs_code != COND_EXPR
> +             && rhs_code != CALL_EXPR)
>            {
>              if (vect_print_dump_info (REPORT_SLP))
>                {
> --- gcc/tree-vect-stmts.c.jj    2011-11-07 20:32:09.0 +0100
> +++ gcc/tree-vect-stmts.c       2011-11-08 09:28:55.0 +0100
> @@ -1521,7 +1521,8 @@ vectorizable_function (gimple call, tree
>    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
>
>  static b

[patch] Fix PR tree-optimization/51015

2011-11-08 Thread Ira Rosen
Hi,

Some of the recently added vectorizer pattern detection functions
create pattern def stmts, and set vectype for these statements. This
causes an assert failure in vect_determine_vectorization_factor, since
we only expect data-refs and pattern statements themselves to have the
vectype set. This patch updates the assert.

Bootstrapped on powerpc64-suse-linux and tested on
powerpc64-suse-linux and x86_64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/51015
* tree-vect-loop.c (vect_determine_vectorization_factor): Expect
vectype to be set for pattern def stmts.

testsuite/ChangeLog:

PR tree-optimization/51015
* gcc.dg/vect/pr51015.c: New test.

Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 181159)
+++ tree-vect-loop.c(working copy)
@@ -342,10 +342,12 @@ vect_determine_vectorization_factor (loop_vec_info
  if (STMT_VINFO_VECTYPE (stmt_info))
{
  /* The only case when a vectype had been already set is for stmts
-that contain a dataref, or for "pattern-stmts" (stmts generated
-by the vectorizer to represent/replace a certain idiom).  */
+that contain a dataref, or for "pattern-stmts" (stmts
+generated by the vectorizer to represent/replace a certain
+idiom).  */
  gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
- || is_pattern_stmt_p (stmt_info));
+ || is_pattern_stmt_p (stmt_info)
+ || pattern_def);
  vectype = STMT_VINFO_VECTYPE (stmt_info);
}
  else
Index: testsuite/gcc.dg/vect/pr51015.c
===
--- testsuite/gcc.dg/vect/pr51015.c (revision 0)
+++ testsuite/gcc.dg/vect/pr51015.c (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+typedef unsigned long long __u64;
+static __u64 ext2_max_sizes[16 - 10 + 1];
+
+void e2fsck_pass1()
+{
+ int i;
+ __u64 max_sizes;
+
+ for (i = 10; i <= 16; i++) {
+  max_sizes = 12 + (1ULL << ((i) - 2));
+  max_sizes = max_sizes + (1ULL << ((i) - 2)) * (1ULL << ((i) - 2));
+  max_sizes = max_sizes + (1ULL << ((i) - 2)) * (1ULL << ((i) - 2)) *
(1ULL <<((i) - 2));
+  ext2_max_sizes[i - 10] = max_sizes;
+ }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */


Re: [PATCH] Fix fallout from bool store pattern recognition (PR tree-optimization/51000)

2011-11-09 Thread Ira Rosen
On 9 November 2011 23:32, Jakub Jelinek  wrote:
> Hi!
>
> When a bool store gets a pattern stmt, we need to update
> DR_STMT (otherwise the original rather than replaced stmts
> are used e.g. for interleaving etc.).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, testcase
> tested on powerpc64-linux, ok for trunk?

OK.

Thanks,
Ira

>
> 2011-11-09  Jakub Jelinek  
>
>        PR tree-optimization/51000
>        * tree-vect-patterns.c (vect_recog_bool_pattern): If adding
>        a pattern stmt for a bool store, adjust DR_STMT too.
>        Don't handle bool conversions to single bit precision lhs.
>        * tree-vect-stmts.c (vect_remove_stores): If next is a pattern
>        stmt, remove its related stmt and free its stmt_vinfo.
>        (free_stmt_vec_info): Free also pattern stmt's vinfo and
>        pattern def stmt's vinfo.
>        * tree-vect-loop.c (destroy_loop_vec_info): Don't try to
>        free pattern stmt's vinfo here.
>        (vect_transform_loop): When calling vect_remove_stores,
>        do gsi_next first and don't call gsi_remove.  If not strided
>        store, free stmt vinfo for gsi_stmt (si) rather than stmt.
>
>        * gcc.dg/vect/pr51000.c: New test.
>

>
>        Jakub
>


[patch] Fix PR tree-optimization/51058

2011-11-10 Thread Ira Rosen
Hi,

This patch handles CALL_EXPRs in constant/invariant operand creation in SLP.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/51058
* tree-vect-slp.c (vect_get_constant_vectors): Handle CALL_EXPR.

testsuite/ChangeLog:

PR tree-optimization/51058
* gfortran.dg/vect/pr51058.f90: New test.

Index: tree-vect-slp.c
===
--- tree-vect-slp.c (revision 181250)
+++ tree-vect-slp.c (working copy)
@@ -2191,7 +2191,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_n
   VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
   bool constant_p, is_store;
   tree neutral_op = NULL;
-  enum tree_code code = gimple_assign_rhs_code (stmt);
+  enum tree_code code = gimple_expr_code (stmt);
   gimple def_stmt;
   struct loop *loop;

@@ -2287,22 +2287,32 @@ vect_get_constant_vectors (tree op, slp_tree slp_n
 {
   if (is_store)
 op = gimple_assign_rhs1 (stmt);
-  else if (gimple_assign_rhs_code (stmt) != COND_EXPR)
-op = gimple_op (stmt, op_num + 1);
- else
+  else
{
- if (op_num == 0 || op_num == 1)
+ switch (code)
{
- tree cond = gimple_assign_rhs1 (stmt);
- op = TREE_OPERAND (cond, op_num);
+ case COND_EXPR:
+   if (op_num == 0 || op_num == 1)
+ {
+   tree cond = gimple_assign_rhs1 (stmt);
+   op = TREE_OPERAND (cond, op_num);
+ }
+   else
+ {
+   if (op_num == 2)
+ op = gimple_assign_rhs2 (stmt);
+   else
+ op = gimple_assign_rhs3 (stmt);
+ }
+   break;
+
+ case CALL_EXPR:
+   op = gimple_call_arg (stmt, op_num);
+   break;
+
+ default:
+   op = gimple_op (stmt, op_num + 1);
}
- else
-   {
- if (op_num == 2)
-   op = gimple_assign_rhs2 (stmt);
- else
-   op = gimple_assign_rhs3 (stmt);
-   }
}

   if (reduc_index != -1)
Index: testsuite/gfortran.dg/vect/pr51058.f90
===
--- testsuite/gfortran.dg/vect/pr51058.f90  (revision 0)
+++ testsuite/gfortran.dg/vect/pr51058.f90  (revision 0)
@@ -0,0 +1,19 @@
+! { dg-do compile }
+
+  SUBROUTINE MLIST(MOLsp,PBCx,PBCy,PBCz, X0)
+  IMPLICIT NONE
+  INTEGER, PARAMETER :: NM=16384
+  INTEGER :: MOLsp, i
+  REAL :: PBCx, PBCy, PBCz, boxjmp, HALf=1./2.
+  REAL :: X0(2,-2:NM)
+
+ DO i = 1 , MOLsp
+boxjmp = PBCx*INT(X0(1,i)+SIGN(HALf,X0(1,i)))
+X0(1,i) = X0(1,i) - boxjmp
+boxjmp = PBCy*INT(X0(2,i)+SIGN(HALf,X0(2,i)))
+X0(2,i) = X0(2,i) - boxjmp
+ ENDDO
+  END
+
+! { dg-final { cleanup-tree-dump "vect" } }
+


Re: [PATCH] Free memory leaks in tree-vect-slp.c

2011-11-10 Thread Ira Rosen
On 10 November 2011 21:31, Jakub Jelinek  wrote:
> Hi!
>
> This patch fixes some compiler memory leaks in SLP.
> For vect_free_oprnd_info I've removed the FREE_DEF_STMTS argument
> and am freeing the defs always, but set them to NULL when moving the vectors
> over elsewhere, because otherwise if vect_create_new_slp_node
> or vect_build_slp_tree fails after succeeding for a couple of iterations,
> we'd leak the rest or double free them.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Ira

>
> 2011-11-10  Jakub Jelinek  
>
>        * tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN
>        vector.
>        (vect_create_new_slp_node): Don't allocate node before checking stmt
>        type.
>        (vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always
>        free def_stmts vectors and additionally free oprnd_info.
>        (vect_build_slp_tree): Adjust callers.  Call it even if
>        stop_recursion.  If vect_create_new_slp_node or
>        vect_build_slp_tree fails, properly handle freeing memory.
>        If it succeeded, clear def_stmts in oprnd_info.
>
> --- gcc/tree-vect-slp.c.jj      2011-11-08 23:35:12.0 +0100
> +++ gcc/tree-vect-slp.c 2011-11-10 16:17:33.583105311 +0100
> @@ -75,8 +75,9 @@ vect_free_slp_tree (slp_tree node)
>     return;
>
>   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
> -    vect_free_slp_tree ((slp_tree)child);
> +    vect_free_slp_tree ((slp_tree) child);
>
> +  VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
>   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
>
>   if (SLP_TREE_VEC_STMTS (node))
> @@ -102,7 +103,7 @@ vect_free_slp_instance (slp_instance ins
>  static slp_tree
>  vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
>  {
> -  slp_tree node = XNEW (struct _slp_tree);
> +  slp_tree node;
>   gimple stmt = VEC_index (gimple, scalar_stmts, 0);
>   unsigned int nops;
>
> @@ -117,6 +118,7 @@ vect_create_new_slp_node (VEC (gimple, h
>   else
>     return NULL;
>
> +  node = XNEW (struct _slp_tree);
>   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
>   SLP_TREE_VEC_STMTS (node) = NULL;
>   SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
> @@ -152,21 +154,19 @@ vect_create_oprnd_info (int nops, int gr
>  }
>
>
> -/* Free operands info.  Free def-stmts in FREE_DEF_STMTS is true.
> -   (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
> -   succeds.  In the later case we don't need the operands info that we used 
> to
> -   check isomorphism of the stmts, but we still need the def-stmts - they are
> -   used as scalar stmts in SLP nodes.  */
> +/* Free operands info.  */
> +
>  static void
> -vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info,
> -                      bool free_def_stmts)
> +vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
>  {
>   int i;
>   slp_oprnd_info oprnd_info;
>
> -  if (free_def_stmts)
> -    FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
> +  FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
> +    {
>       VEC_free (gimple, heap, oprnd_info->def_stmts);
> +      XDELETE (oprnd_info);
> +    }
>
>   VEC_free (slp_oprnd_info, heap, *oprnds_info);
>  }
> @@ -502,7 +502,7 @@ vect_build_slp_tree (loop_vec_info loop_
>               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
>             }
>
> -         vect_free_oprnd_info (&oprnds_info, true);
> +         vect_free_oprnd_info (&oprnds_info);
>           return false;
>         }
>
> @@ -516,7 +516,7 @@ vect_build_slp_tree (loop_vec_info loop_
>              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
>            }
>
> -         vect_free_oprnd_info (&oprnds_info, true);
> +         vect_free_oprnd_info (&oprnds_info);
>          return false;
>        }
>
> @@ -532,7 +532,7 @@ vect_build_slp_tree (loop_vec_info loop_
>               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
>             }
>
> -          vect_free_oprnd_info (&oprnds_info, true);
> +         vect_free_oprnd_info (&oprnds_info);
>           return false;
>         }
>
> @@ -546,7 +546,7 @@ vect_build_slp_tree (loop_vec_info loop_
>               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
>             }
>
> -         vect_free_oprnd_info (&oprnds_info, true);
> +         vect_free_oprnd_info (&oprnds_info);
>           return false;
>         }
>
> @@ -576,7 +576,7 @@ vect_build_slp_tree (loop_vec_info loop_
>                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
>                }
>
> -             vect_free_oprnd_info (&oprnds_info, true);
> +             vect_free_oprnd_info (&oprnds_info);
>              return false;
>            }
>        }
> @@ -611,7 +611,7 @@ vect_build_slp_tree (loop_vec_info loop_
>                    {
>                      if (vect_print_dump_info (REPORT_SLP))
>                        fprintf (vect_dump, "Build SLP failed: no optab.");
> -        

Re: [PATCH] Don't ICE on SLP calls if the same call is used in multiple SLP instances (PR tree-optimization/51058)

2011-11-11 Thread Ira Rosen
On 11 November 2011 17:32, Jakub Jelinek  wrote:
> Hi!

Hi,

>
> Removing the scalar call in vectorizable_call for SLP vectorization
> is too early, when another SLP instance refers to the same scalar call,
> we'll ICE because that stmt doesn't have bb anymore or gsi_for_stmt
> doesn't succeed for it.
>
> Fixed by postponing replacement of calls with zeroing of lhs for later
> in the SLP case.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-11-11  Jakub Jelinek  
>
>        PR tree-optimization/51058
>        * tree-vect-slp.c (vect_remove_slp_scalar_calls): New function.
>        (vect_schedule_slp): Call it.
>        * tree-vect-stmts.c (vectorizable_call): If slp_node != NULL,
>        don't replace scalar calls with clearing of their lhs here.

I think it's rhs.

>
>        * gcc.dg/vect/fast-math-vect-call-1.c: Add f4 test.
>        * gfortran.fortran-torture/compile/pr51058.f90: New test.
>
> --- gcc/tree-vect-slp.c.jj      2011-11-10 18:09:12.0 +0100
> +++ gcc/tree-vect-slp.c 2011-11-11 13:18:42.157292895 +0100
> @@ -2898,6 +2898,46 @@ vect_schedule_slp_instance (slp_tree nod
>   return is_store;
>  }
>
> +/* Replace scalar calls from SLP node NODE with clearing of their lhs.

Here too.

> +   For loop vectorization this is done in vectorizable_call, but for SLP
> +   it needs to be deferred until end of vect_schedule_slp, because multiple
> +   SLP instances may refer to the same scalar stmt.  */
> +
> +static void
> +vect_remove_slp_scalar_calls (slp_tree node)
> +{

...

> --- gcc/testsuite/gfortran.fortran-torture/compile/pr51058.f90.jj       
> 2011-11-11 13:26:14.665615842 +0100
> +++ gcc/testsuite/gfortran.fortran-torture/compile/pr51058.f90  2011-11-11 
> 13:25:50.0 +0100
> @@ -0,0 +1,18 @@
> +! PR tree-optimization/51058
> +! { dg-do compile }
> +subroutine pr51058(n, u, v, w, z)
> +  double precision :: x(3,-2:16384), y(3,-2:16384), b, u, v, w, z
> +  integer :: i, n
> +  common /c/ x, y
> +  do i = 1, n
> +    b = u * int(x(1,i)) + sign(z,x(1,i))
> +    x(1,i) = x(1,i) - b
> +    y(1,i) = y(1,i) - b
> +    b = v * int(x(2,i)) + sign(z,x(2,i))
> +    x(2,i) = x(2,i) - b
> +    y(2,i) = y(2,i) - b
> +    b = w * int(x(3,i)) + sign(z,x(3,i))
> +    x(3,i) = x(3,i) - b
> +    y(3,i) = y(3,i) - b
> +  end do
> +end subroutine

Please add
! { dg-final { cleanup-tree-dump "vect" } }


OK otherwise.

Thanks,
Ira

>
>        Jakub
>


Re: [PATCH] Don't ICE on SLP calls if the same call is used in multiple SLP instances (PR tree-optimization/51058)

2011-11-11 Thread Ira Rosen
On 11 November 2011 19:06, Jakub Jelinek  wrote:
> On Fri, Nov 11, 2011 at 06:57:58PM +0200, Ira Rosen wrote:
>> On 11 November 2011 17:32, Jakub Jelinek  wrote:
>> > 2011-11-11  Jakub Jelinek  
>> >
>> >PR tree-optimization/51058
>> >* tree-vect-slp.c (vect_remove_slp_scalar_calls): New function.
>> >(vect_schedule_slp): Call it.
>> >* tree-vect-stmts.c (vectorizable_call): If slp_node != NULL,
>> >don't replace scalar calls with clearing of their lhs here.
>>
>> I think it's rhs.
>
> I think it is lhs.  The scalar call is
>  lhs = __builtin_copysign (arg1, arg2);
> etc. and we transform it to
>  lhs = 0.0;

I still think it's clearing of rhs, but this is not really important :)

On 11 November 2011 19:13, Jakub Jelinek  wrote:
> On Fri, Nov 11, 2011 at 06:06:18PM +0100, Jakub Jelinek wrote:
>> > Please add
>> > ! { dg-final { cleanup-tree-dump "vect" } }
>> >
>> > OK otherwise.
>>
>> This is not a /vect/ testcase, but fortran torture.

Ah, sorry, indeed I thought it's a vect test.

>> I guess
>> if you really want I could move it over to gfortran.dg/vect/ instead,
>> then the ! { dg-final { cleanup-tree-dump "vect" } }
>> would be indeed needed there.
>
> That would be following, incrementally tested with
> make check-gfortran RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
> vect.exp=pr51*'
> with both unpatched and patched f951.
>
> 2011-11-11  Jakub Jelinek  
>
>        PR tree-optimization/51058
>        * tree-vect-slp.c (vect_remove_slp_scalar_calls): New function.
>        (vect_schedule_slp): Call it.
>        * tree-vect-stmts.c (vectorizable_call): If slp_node != NULL,
>        don't replace scalar calls with clearing of their lhs here.
>
>        * gcc.dg/vect/fast-math-vect-call-1.c: Add f4 test.
>        * gfortran.dg/vect/pr51058-2.f90: New test.

Looks good.

Thanks,
Ira

>
> --- gcc/tree-vect-slp.c.jj      2011-11-11 16:02:40.475359160 +0100
> +++ gcc/tree-vect-slp.c 2011-11-11 18:08:29.784708271 +0100
> @@ -2902,6 +2902,46 @@ vect_schedule_slp_instance (slp_tree nod
>   return is_store;
>  }
>
> +/* Replace scalar calls from SLP node NODE with clearing of their lhs.
> +   For loop vectorization this is done in vectorizable_call, but for SLP
> +   it needs to be deferred until end of vect_schedule_slp, because multiple
> +   SLP instances may refer to the same scalar stmt.  */
> +
> +static void
> +vect_remove_slp_scalar_calls (slp_tree node)
> +{
> +  gimple stmt, new_stmt;
> +  gimple_stmt_iterator gsi;
> +  int i;
> +  slp_void_p child;
> +  tree lhs;
> +  stmt_vec_info stmt_info;
> +
> +  if (!node)
> +    return;
> +
> +  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
> +    vect_remove_slp_scalar_calls ((slp_tree) child);
> +
> +  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
> +    {
> +      if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
> +       continue;
> +      stmt_info = vinfo_for_stmt (stmt);
> +      if (stmt_info == NULL
> +         || is_pattern_stmt_p (stmt_info)
> +         || !PURE_SLP_STMT (stmt_info))
> +       continue;
> +      lhs = gimple_call_lhs (stmt);
> +      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
> +      set_vinfo_for_stmt (new_stmt, stmt_info);
> +      set_vinfo_for_stmt (stmt, NULL);
> +      STMT_VINFO_STMT (stmt_info) = new_stmt;
> +      gsi = gsi_for_stmt (stmt);
> +      gsi_replace (&gsi, new_stmt, false);
> +      SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
> +    }
> +}
>
>  /* Generate vector code for all SLP instances in the loop/basic block.  */
>
> @@ -2941,6 +2981,8 @@ vect_schedule_slp (loop_vec_info loop_vi
>       unsigned int j;
>       gimple_stmt_iterator gsi;
>
> +      vect_remove_slp_scalar_calls (root);
> +
>       for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
>                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
>         {
> --- gcc/tree-vect-stmts.c.jj    2011-11-11 16:02:28.343433924 +0100
> +++ gcc/tree-vect-stmts.c       2011-11-11 18:08:29.786708241 +0100
> @@ -1886,6 +1886,9 @@ vectorizable_call (gimple stmt, gimple_s
>      it defines is mapped to the new definition.  So just replace
>      rhs of the statement with something harmless.  */
>
> +  if (slp_node)
> +    return true;
> +
>   type = TREE_TYPE (scalar_dest);
>   if (is_pattern_stmt_p (stmt_info))
>     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
> @@ -1893,8 +1896,7 

Re: [PATCH][2/2][RFC] Fix PR49806, promote/demote binary operations in VRP

2011-08-02 Thread Ira Rosen

> +   /* Now we have matched the statement pattern
> +
> +rhs1 = (T1)x;
> +rhs2 = (T1)y;
> +op_result = rhs1 OP rhs2;
> +lhs = (T2)op_result;

Just a note that the patch I proposed for the vectorizer (
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01472.html) also handles
constants, multiple statements (i.e., op_result doesn't have to be promoted
itself, but the sequence needs to end up with a promotion), and also it may
use an intermediate type for OP. The tests in my patch don't match the
pattern this patch detects.

Thanks,
Ira



Re: [PATCH][2/2][RFC] Fix PR49806, promote/demote binary operations in VRP

2011-08-02 Thread Ira Rosen


Richard Guenther  wrote on 02/08/2011 01:33:49 PM:
>
> On Tue, 2 Aug 2011, Ira Rosen wrote:
>
> >
> > > +   /* Now we have matched the statement pattern
> > > +
> > > +rhs1 = (T1)x;
> > > +rhs2 = (T1)y;
> > > +op_result = rhs1 OP rhs2;
> > > +lhs = (T2)op_result;
> >
> > Just a note that the patch I proposed for the vectorizer (
> > http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01472.html) also handles
> > constants, multiple statements (i.e., op_result doesn't have to be
promoted
> > itself, but the sequence needs to end up with a promotion), and also it
may
> > use an intermediate type for OP. The tests in my patch don't match the
> > pattern this patch detects.
>
> Ok, I only looked at the description of your patch, not the patch itself.
>
> The patch already handles constant 2nd operands.
>
> It shouldn't be difficult to handle multiple statements here, either by
> instead of the above match only
>
> op_result = rhs1 OP rhs2;
> lhs = (T2)op_result;
>
> and thus allow iteration on the promoted/demoted operation operands
> or by collecting all defs first.
>
> Do you handle arbitrary def trees or only a linear chain as suggested
> by
>
> + S2  x_T = (TYPE) x_t;
> + S3  res0_T = op (x_T, C0);
> + S4  res1_T = op (res0_T, C1);
> + S5  ... = () res1_T;  - type demotion
>
> ?  Thus, do you handle res1_T = op (res0_T, res2_T) with a possibly
> different TYPE in its def?

Only linear chains. But it doesn't seem too complicated to only check if
res2_T is a result of a type promotion.

Thanks,
Ira

> The case of
>
> op_result = rhs1 OP CST;
> lhs = (T2)op_result;
>
> is probably always profitable to demote to
>
> rhs1' = (T2)rhs1;
> lhs = rhs1' OP (T2)CST;
>
> and "iterating" that should be simple (handling two variable
> operands will probably get a bit convoluted).
>
> Thanks,
> Richard.



Re: [PATCH][2/2][RFC] Fix PR49806, promote/demote binary operations in VRP

2011-08-02 Thread Ira Rosen


Richard Guenther  wrote on 02/08/2011 04:25:58 PM:


>
> Thinking about it it probably makes sense to keep a variant of this
> in the vectorizer - after all it has quite specific requirements on
> operand sizes while VRP would probably demote as far as possible
> (maybe taking PROMOTE_MODE into account).
>
> A quick look at your patch reveals
>
> +  if (gimple_assign_rhs_code (use_stmt) == CONVERT_EXPR)
>
> CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
>
> +  tmp = create_tmp_var (use_type, NULL);
>
> create_tmp_reg

Why? USE_TYPE is neither COMPLEX_TYPE nor VECTOR_TYPE.

Thanks,
Ira


>
> +  if (!types_compatible_p (TREE_TYPE (oprnd0), type)
> +  || !types_compatible_p (TREE_TYPE (oprnd1), type)
> +  || (TREE_CODE (oprnd0) != INTEGER_CST
> +  && TREE_CODE (oprnd1) != INTEGER_CST))
>
> it's always the second operand that is constant, you can simplify
> the code to not handle CST op SSA.
>
> +  code = gimple_assign_rhs_code (stmt);
> +  if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
> +  && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code !=
> BIT_AND_EXPR)
> +return false;
> +
> +  oprnd0 = gimple_assign_rhs1 (stmt);
> +  oprnd1 = gimple_assign_rhs2 (stmt);
> +  type = gimple_expr_type (stmt);
> +  if (!types_compatible_p (TREE_TYPE (oprnd0), type)
> +  || !types_compatible_p (TREE_TYPE (oprnd1), type)
>
> for shifts the type compatibility check of oprnd1 isn't guaranteed
> (but do we care?  we only will handle constant shift amounts), for
> the other operands of the codes you handle they always return true.
>
> So I'd simplify the check to
>
>   if (TREE_CODE (oprnd0) != SSA_NAME
>   || TREE_CODE (oprnd1) != INTEGER_CST)
> return false;
>
> Otherwise the patch looks sensible.
>
> Richard.



Re: [patch] Reduce over-promotion of vector operations

2011-08-04 Thread Ira Rosen
On 19 July 2011 09:44, Ira Rosen  wrote:
> Hi,
>
> This patch tries to reduce over-promotion of vector operations that
> could be done with narrower elements, e.g., for
>
> char a;
> int b, c;
> short d;
>
> b = (int) a;
> c = b << 2;
> d = (short) c;
>
> we currently produce six vec_unpack_lo/hi_expr statements for
> char->int conversion and then two vec_pack_trunc_expr for short->int.
> While the shift can be performed on short, using only two
> vec_unpack_lo/hi_expr operations for char->short conversion in this
> example.
>
> With this patch we detect such over-promoted sequences that start with
> a type promotion operation and end with a type demotion operation. The
> statements in between are checked if they can be performed using
> smaller type (this patch only adds a support for shifts and bit
> operations with a constant). If a sequence is detected we create a
> sequence of scalar pattern statements to be vectorized instead the
> original one.  Since there may be two pattern statements created for
> the same original statement - the operation itself (on an intermediate
> type) and a type promotion (from a smaller type to the intermediate
> type) for the non-constant operand - this patch adds a new field to
> struct _stmt_vec_info to keep that pattern def statement.
>
> Bootstrapped and tested on powerpc64-suse-linux.
> Comments are welcome.

I committed the attached version which incorporates Richard's comments
from here http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00144.html.

Thanks,
Ira

>
> Thanks,
> Ira
>
> ChangeLog:
>
>   * tree-vectorizer.h (struct _stmt_vec_info): Add new field for
>   pattern def statement, and its access macro.
>   (NUM_PATTERNS): Set to 5.
>   * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
>   pattern def statement.
>   (vect_transform_loop): Likewise.
>   * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
>   function vect_recog_over_widening_pattern ().
>   (vect_operation_fits_smaller_type): New function.
>   (vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
>   Likewise.
>   (vect_pattern_recog_1): Move the code that marks pattern
>   statements to vect_mark_pattern_stmts (), and call it.  Update
>   documentation.
>   * tree-vect-stmts.c (vect_supportable_shift): New function.
>   (vect_analyze_stmt): Handle pattern def statement.
>   (new_stmt_vec_info): Initialize pattern def statement.
>
> testsuite/ChangeLog:
>
>   * gcc.dg/vect/vect-over-widen-1.c: New test.
>   * gcc.dg/vect/vect-over-widen-2.c: New test.
>   * gcc.dg/vect/vect-over-widen-3.c: New test.
>   * gcc.dg/vect/vect-over-widen-4.c: New test.
>
Index: ChangeLog
===
--- ChangeLog   (revision 177408)
+++ ChangeLog   (working copy)
@@ -1,3 +1,23 @@
+2011-08-04  Ira Rosen  
+
+   * tree-vectorizer.h (struct _stmt_vec_info): Add new field for
+   pattern def statement, and its access macro.
+   (NUM_PATTERNS): Set to 5.
+   * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
+   pattern def statement.
+   (vect_transform_loop): Likewise.
+   * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
+   function vect_recog_over_widening_pattern ().
+   (vect_operation_fits_smaller_type): New function.
+   (vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
+   Likewise.
+   (vect_pattern_recog_1): Move the code that marks pattern
+   statements to vect_mark_pattern_stmts (), and call it.  Update
+   documentation.
+   * tree-vect-stmts.c (vect_supportable_shift): New function.
+   (vect_analyze_stmt): Handle pattern def statement.
+   (new_stmt_vec_info): Initialize pattern def statement.
+
 2011-08-04  Richard Henderson  
 
PR target/49964
Index: testsuite/gcc.dg/vect/vect-over-widen-1.c
===
--- testsuite/gcc.dg/vect/vect-over-widen-1.c   (revision 0)
+++ testsuite/gcc.dg/vect/vect-over-widen-1.c   (revision 0)
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include 
+#include 
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg.  */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+  unsigned char *s = src;
+  unsigned short *d = (unsigned short *)dst;
+  int i;
+
+  for (i = 0; i < N/4; i++)
+{
+  const int b = *s++;
+  const int g = *s++;
+  const int r = *s++;
+  const int a = *s++;
+  *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+  d++;
+}
+
+  s = src;
+  d = (unsigned short *)dst;
+  for (i = 0

[patch, vectorizer] Fix a bug in creation of reduction epilogue

2011-08-05 Thread Ira Rosen

Hi,

In case of multiple loop exit phis in vectorization of reduction we reduce
them to one vector. The result of this reduction is later ignored in case
we do the final value extraction with scalar code. This causes wrong code
generation for gfortran.dg/forall_7.f90 with -O3 -funroll-loops on Cell
SPU. This patch fixes this.

Bootstrapped on powerpc64-suse-linux and tested on powerpc64-suse-linux and
on spu-redhat-linux.
Committed.

Ira

ChangeLog:

* tree-vect-loop.c (vect_create_epilog_for_reduction): Use the
result of multiple results reduction when extracting the final
value using scalar code.

Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 177266)
+++ tree-vect-loop.c(working copy)
@@ -3683,13 +3683,13 @@ vect_create_epilog_for_reduction (VEC (tree, heap)
 {
   tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
   tree tmp;
+  gimple new_vec_stmt = NULL;

   vec_dest = vect_create_destination_var (scalar_dest, vectype);
   for (k = 1; k < VEC_length (gimple, new_phis); k++)
 {
   gimple next_phi = VEC_index (gimple, new_phis, k);
   tree second_vect = PHI_RESULT (next_phi);
-  gimple new_vec_stmt;

   tmp = build2 (code, vectype,  first_vect, second_vect);
   new_vec_stmt = gimple_build_assign (vec_dest, tmp);
@@ -3699,6 +3699,11 @@ vect_create_epilog_for_reduction (VEC (tree, heap)
 }

   new_phi_result = first_vect;
+  if (new_vec_stmt)
+{
+  VEC_truncate (gimple, new_phis, 0);
+  VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
+}
 }
   else
 new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
@@ -3809,7 +3814,10 @@ vect_create_epilog_for_reduction (VEC (tree, heap)
   vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
   FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
 {
-  vec_temp = PHI_RESULT (new_phi);
+  if (gimple_code (new_phi) == GIMPLE_PHI)
+vec_temp = PHI_RESULT (new_phi);
+  else
+vec_temp = gimple_assign_lhs (new_phi);
   rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
 bitsize_zero_node);
   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);



[patch] Fix PR tree-optimization/50014

2011-08-08 Thread Ira Rosen
Hi,

In vectorization of reduction we use a dummy def_type when getting a
copy of a vector operand. Therefore, instead of just using a constant
operand from a previous copy, we try to create a copy of it, causing
the failure. This patch adds a call to vect_is_simple_use () to get a
correct def_type.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50014
* tree-vect-loop.c (vectorizable_reduction): Get def type before
calling vect_get_vec_def_for_stmt_copy ().

testsuite/ChangeLog:

PR tree-optimization/50014
* gcc.dg/vect/pr50014.c: New test.

Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 177580)
+++ tree-vect-loop.c(working copy)
@@ -4318,7 +4318,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
   VEC (gimple, heap) *phis = NULL;
   int vec_num;
-  tree def0, def1, tem;
+  tree def0, def1, tem, op0, op1 = NULL_TREE;

   /* In case of reduction chain we switch to the first stmt in the chain, but
  we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -4775,8 +4775,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
   /* Handle uses.  */
   if (j == 0)
 {
-  tree op0, op1 = NULL_TREE;
-
   op0 = ops[!reduc_index];
   if (op_type == ternary_op)
 {
@@ -4806,11 +4804,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
 {
   if (!slp_node)
 {
-  enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
-  loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
loop_vec_def0);
+  enum vect_def_type dt;
+  gimple dummy_stmt;
+  tree dummy;
+
+  vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL,
+  &dummy_stmt, &dummy, &dt);
+  loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
+  loop_vec_def0);
   VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
   if (op_type == ternary_op)
 {
+  vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt,
+  &dummy, &dt);
   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
 loop_vec_def1);
   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
Index: testsuite/gcc.dg/vect/pr50014.c
===
--- testsuite/gcc.dg/vect/pr50014.c (revision 0)
+++ testsuite/gcc.dg/vect/pr50014.c (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int f(unsigned char *s, int n)
+{
+  int sum = 0;
+  int i;
+
+  for (i = 0; i < n; i++)
+sum += 256 * s[i];
+
+  return sum;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+


[patch] Fix PR tree-optimization/50039

2011-08-10 Thread Ira Rosen
Hi,

This patch adds a check in vect_operation_fits_smaller_type ()  that a
widening statement has a stmt_vec_info, i.e., that it is a loop
statement.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50039
* tree-vect-patterns.c (vect_operation_fits_smaller_type): Check
that DEF_STMT has a stmt_vec_info.

testsuite/ChangeLog:

PR tree-optimization/50039
* gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre.
* gcc.dg/vect/no-tree-fre-pr50039.c: New test.

Index: testsuite/gcc.dg/vect/vect.exp
===
--- testsuite/gcc.dg/vect/vect.exp  (revision 177646)
+++ testsuite/gcc.dg/vect/vect.exp  (working copy)
@@ -257,6 +257,12 @@ lappend VECT_SLP_CFLAGS "-fno-tree-reassoc"
 dg-runtest [lsort [glob -nocomplain
$srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]]  \
 "" $VECT_SLP_CFLAGS

+# -fno-tree-fre
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]]  \
+"" $DEFAULT_VECTCFLAGS
+
 # Clean up.
 set dg-do-what-default ${save-dg-do-what-default}

Index: testsuite/gcc.dg/vect/no-tree-fre-pr50039.c
===
--- testsuite/gcc.dg/vect/no-tree-fre-pr50039.c (revision 0)
+++ testsuite/gcc.dg/vect/no-tree-fre-pr50039.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+extern unsigned char g_5;
+extern int g_31, g_76;
+int main(void) {
+ int i, j;
+for (j=0; j < 2; ++j) {
+g_31 = -3;
+for (i=0; i < 2; ++i)
+  g_76 = (g_31 ? g_31+1 : 0) ^ g_5;
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 177646)
+++ tree-vect-patterns.c(working copy)
@@ -897,7 +897,8 @@ vect_operation_fits_smaller_type (gimple stmt, tre
   else
 {
   first = true;
-  if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false))
+  if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
+  || !vinfo_for_stmt (def_stmt))
 return false;
 }


[patch, ARM] Change default vector size to 128 bits - take 3

2011-08-16 Thread Ira Rosen
Hi,

This patch changes the default vector size for auto-vectorization on
ARM NEON to 128 bits. This new version is a result of a discussion
with Richard and Ramana.

wwwdocs changes will follow shortly.

Bootstrapped and tested on arm-linux-gnueabi. The testsuite changes
were also checked on powerpc64-suse-linux and x86_64-suse-linux.

There is one new failure:
gcc.c-torture/execute/mode-dependent-address.c fails with -O3
-funroll-loops with this patch or with -mvectorize-with-neon-quad.
Ramana has a patch to fix this
http://gcc.gnu.org/ml/gcc/2011-08/msg00284.html. I will wait with
committing my patch until this issue is resolved.

OK for mainline?

Thanks,
Ira

ChangeLog:

   * config/arm/arm.c (arm_preferred_simd_mode): Check
   TARGET_NEON_VECTORIZE_DOUBLE instead of
   TARGET_NEON_VECTORIZE_QUAD.
   (arm_expand_sync): Likewise.
   * config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
   mask of mvectorize-with-neon-double.  Add RejectNegative.
   (mvectorize-with-neon-double): New.

testsuite/ChangeLog:

   * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
   New procedure.
   (add_options_for_quad_vectors): Replace with ...
   (add_options_for_double_vectors): ... this.
   * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
    support multiple vector sizes since the vectorizer attempts to
    vectorize with both vector sizes.
   * gcc.dg/vect/no-vfa-vect-79.c,
    gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
    gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
    gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
    gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
    gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
    gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
   * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
   * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
   gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
   gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
   gcc.dg/vect/vect-40.c: Likewise.
  * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
   redundant.
  * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
   gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
   gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
   gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
   Likewise.
 * gcc.dg/vect/vect-peel-4.c: Make ia global.

Index: arm.c
===
--- arm.c   (revision 177426)
+++ arm.c   (working copy)
@@ -22767,15 +22767,15 @@ arm_preferred_simd_mode (enum machine_mode mode)
 switch (mode)
   {
   case SFmode:
-   return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
   case SImode:
-   return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
   case HImode:
-   return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
   case QImode:
-   return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
   case DImode:
-   if (TARGET_NEON_VECTORIZE_QUAD)
+if (!TARGET_NEON_VECTORIZE_DOUBLE)
  return V2DImode;
break;

@@ -23998,7 +23998,7 @@ arm_expand_sync (enum machine_mode mode,
 static unsigned int
 arm_autovectorize_vector_sizes (void)
 {
-  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : 16 | 8;
 }

 static bool
Index: arm.opt
===
--- arm.opt (revision 177426)
+++ arm.opt (working copy)
@@ -238,9 +238,13 @@ Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words.  This option is deprecated.

 mvectorize-with-neon-quad
-Target Report Mask(NEON_VECTORIZE_QUAD)
+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
 Use Neon quad-word (rather than double-word) registers for vectorization

+mvectorize-with-neon-double
+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
+Use Neon double-word (rather than quad-word) registers for vectorization
+
 mword-relocations
 Target Report Var(target_word_relocations)
Init(TARGET_DEFAULT_WORD_RELOCATIONS)
 Only generate absolute relocations on word sized values.
Index: testsuite/lib/target-supports.exp
===
--- testsuite/lib/target-supports.exp   (revision 177426)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3362,6 +3362,24 @@ foreach N {2 3 4 8} {
 }]
 }
 
+# Return 1 if the target supports multiple vector sizes
+
+proc check_effective_target_vect_multiple_sizes { } {
+global et_vect_multiple_size

[wwwdocs][patch] Document the change of default vector size for ARM NEON

2011-08-16 Thread Ira Rosen
Hi,

The first part was already reviewed several months ago, but I am
resubmitting it along with -mvectorize-with-neon-double documentation.

OK to commit?

Thanks,
Ira

 * htdocs/gcc-4.7/changes.html (targets): Document ARM NEON default
 vector size change and -mvectorize-with-neon-double option.

Index: htdocs/gcc-4.7/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.7/changes.html,v
retrieving revision 1.26
diff -r1.26 changes.html
296a297,306
> ARM
> 
> The default vector size in auto-vectorization for NEON is now 128 
> bits.
>   If vectorization fails thusly, the vectorizer tries again with
>   64-bit vectors.
> A new option -mvectorize-with-neon-double was added to
>   allow users to change the vector size to 64 bits.
>
>   
>


Re: [PATCH] Fix ICEs in vect_finish_stmt_generation (PR tree-optimization/50133)

2011-08-30 Thread Ira Rosen


Jakub Jelinek  wrote on 22/08/2011 05:22:59 PM:

> Hi!
>
> The following testcase ICEs, because gsi_end_p (*gsi) and thus
> there is no stmt after it from which to copy over the location.
> As can be seen in the PR, we could do ugly hacks to retrieve locus
> from previous stmt (non-debug of course) instead, but I'm probably
missing
> something obvious why we shouldn't take location from stmt itself
> instead.

We insert vector statement before GSI, which may be different from location
of STMT.

Ira

> We've been doing that before, just PR37482 patch changed that
> without an explanation.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-08-22  Jakub Jelinek  
>
>PR tree-optimization/50133
>* tree-vect-stmts.c (vect_finish_stmt_generation): Copy location
>from stmt instead of some statement around gsi.
>
>* gcc.dg/pr50133.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj   2011-08-22 08:17:08.0 +0200
> +++ gcc/tree-vect-stmts.c   2011-08-22 11:26:27.0 +0200
> @@ -1419,7 +1419,6 @@ vect_finish_stmt_generation (gimple stmt
>stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> -  gimple_stmt_iterator si;
>
>gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
>
> @@ -1434,13 +1433,7 @@ vect_finish_stmt_generation (gimple stmt
>print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
>  }
>
> -  si = *gsi;
> -  if (is_gimple_debug (gsi_stmt (si)))
> -{
> -  gsi_next_nondebug (&si);
> -  gcc_assert (!gsi_end_p (si));
> -}
> -  gimple_set_location (vec_stmt, gimple_location (gsi_stmt (si)));
> +  gimple_set_location (vec_stmt, gimple_location (stmt));
>  }
>
>  /* Checks if CALL can be vectorized in type VECTYPE.  Returns
> --- gcc/testsuite/gcc.dg/pr50133.c.jj   2011-08-22 11:27:15.0
+0200
> +++ gcc/testsuite/gcc.dg/pr50133.c   2011-08-22 11:12:04.0 +0200
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/50133 */
> +/* { dg-do compile } */
> +/* { dg-options "-O -ftree-vectorize -fno-tree-loop-im" } */
> +
> +extern int A[], B[];
> +
> +void
> +foo (int z)
> +{
> +  int j, i;
> +  for (j = 0; j < 32; j++)
> +{
> +  int a = A[0];
> +  for (i = 0; i < 16; i++)
> +   a = A[i] ? a : z;
> +  B[j] = a;
> +}
> +}
>
>Jakub



Re: [PATCH] Make vectorizer dumps more comparable

2011-09-01 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 24/08/2011 02:20:50 PM:
>
> This avoids the file/location clutter in front of each line
> in the vectorizer dump.  While this is useful for people
> requesting -fvectorizer-verbose=N in dump files this makes
> you unable to compare dumps for testcases on a branch and trunk.
> It also makes lines excessively long because the testsuite
> filename paths are so long.  Very annoying.
>
> (I'd argue also that -fvectorizer-verbose=N dumps to the dump
> file if available and not always to stderr is bogus, but well ...)
>
> This patch has made my life a lot easier debugging the data dependence
> stuff.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, installed on trunk.
>
> Richard.
>
> 2011-08-24  Richard Guenther  
>
>* tree-vectorizer.c (vect_print_dump_info): Avoid the
>file and location clutter when dumping to dump files.


IMO it's a bad idea. It's now impossible to find anything when compiling a
big file. How about only removing the file name?

Index: tree-vectorizer.c
===
--- tree-vectorizer.c   (revision 178374)
+++ tree-vectorizer.c   (working copy)
@@ -149,16 +149,12 @@ vect_print_dump_info (enum vect_verbosit
   if (!current_function_decl || !vect_dump)
 return false;

-  if (dump_file)
-fprintf (vect_dump, "\n");
-
-  else if (vect_location == UNKNOWN_LOC)
-fprintf (vect_dump, "\n%s:%d: note: ",
-DECL_SOURCE_FILE (current_function_decl),
+  if (vect_location == UNKNOWN_LOC)
+fprintf (vect_dump, "\nline %d: ",
 DECL_SOURCE_LINE (current_function_decl));
   else
-fprintf (vect_dump, "\n%s:%d: note: ",
-LOC_FILE (vect_location), LOC_LINE (vect_location));
+fprintf (vect_dump, "\nline %d: ",
+LOC_LINE (vect_location));

   return true;
 }

Ira

>
> Index: gcc/tree-vectorizer.c
> ===
> --- gcc/tree-vectorizer.c   (revision 178028)
> +++ gcc/tree-vectorizer.c   (working copy)
> @@ -149,7 +149,10 @@ vect_print_dump_info (enum vect_verbosit
>if (!current_function_decl || !vect_dump)
>  return false;
>
> -  if (vect_location == UNKNOWN_LOC)
> +  if (dump_file)
> +fprintf (vect_dump, "\n");
> +
> +  else if (vect_location == UNKNOWN_LOC)
>  fprintf (vect_dump, "\n%s:%d: note: ",
>  DECL_SOURCE_FILE (current_function_decl),
>  DECL_SOURCE_LINE (current_function_decl));
>



Re: [PATCH] Make vectorizer dumps more comparable

2011-09-01 Thread Ira Rosen


Richard Guenther  wrote on 01/09/2011 10:33:23 AM:

> On Thu, 1 Sep 2011, Ira Rosen wrote:
>
> >
> >
> > gcc-patches-ow...@gcc.gnu.org wrote on 24/08/2011 02:20:50 PM:
> > >
> > > This avoids the file/location clutter in front of each line
> > > in the vectorizer dump.  While this is useful for people
> > > requesting -fvectorizer-verbose=N in dump files this makes
> > > you unable to compare dumps for testcases on a branch and trunk.
> > > It also makes lines excessively long because the testsuite
> > > filename paths are so long.  Very annoying.
> > >
> > > (I'd argue also that -fvectorizer-verbose=N dumps to the dump
> > > file if available and not always to stderr is bogus, but well ...)
> > >
> > > This patch has made my life a lot easier debugging the data
dependence
> > > stuff.
> > >
> > > Bootstrapped and tested on x86_64-unknown-linux-gnu, installed on
trunk.
> > >
> > > Richard.
> > >
> > > 2011-08-24  Richard Guenther  
> > >
> > >* tree-vectorizer.c (vect_print_dump_info): Avoid the
> > >file and location clutter when dumping to dump files.
> >
> >
> > IMO it's a bad idea. It's now impossible to find anything when
compiling a
> > big file. How about only removing the file name?
>
> How about, as Micha suggested, print the location of the loop
> we currently investigate from vectorize_loops () where we
> call find_loop_location () instead?

The problem is that a dump of a single loop can be pretty long, and "start
to analyze loop..."/"finish to analyze loop..." may be not visible enough.
I am OK with adding these printings though (in addition to line numbers).

I understand why you didn't like to see the file location, but what's the
problem with the line number?

Ira

>
> Richard.
>
> > Index: tree-vectorizer.c
> > ===
> > --- tree-vectorizer.c   (revision 178374)
> > +++ tree-vectorizer.c   (working copy)
> > @@ -149,16 +149,12 @@ vect_print_dump_info (enum vect_verbosit
> >if (!current_function_decl || !vect_dump)
> >  return false;
> >
> > -  if (dump_file)
> > -fprintf (vect_dump, "\n");
> > -
> > -  else if (vect_location == UNKNOWN_LOC)
> > -fprintf (vect_dump, "\n%s:%d: note: ",
> > -DECL_SOURCE_FILE (current_function_decl),
> > +  if (vect_location == UNKNOWN_LOC)
> > +fprintf (vect_dump, "\nline %d: ",
> >  DECL_SOURCE_LINE (current_function_decl));
> >else
> > -fprintf (vect_dump, "\n%s:%d: note: ",
> > -LOC_FILE (vect_location), LOC_LINE (vect_location));
> > +fprintf (vect_dump, "\nline %d: ",
> > +LOC_LINE (vect_location));
> >
> >return true;
> >  }
> >
> > Ira
> >
> > >
> > > Index: gcc/tree-vectorizer.c
> > > ===
> > > --- gcc/tree-vectorizer.c   (revision 178028)
> > > +++ gcc/tree-vectorizer.c   (working copy)
> > > @@ -149,7 +149,10 @@ vect_print_dump_info (enum vect_verbosit
> > >if (!current_function_decl || !vect_dump)
> > >  return false;
> > >
> > > -  if (vect_location == UNKNOWN_LOC)
> > > +  if (dump_file)
> > > +fprintf (vect_dump, "\n");
> > > +
> > > +  else if (vect_location == UNKNOWN_LOC)
> > >  fprintf (vect_dump, "\n%s:%d: note: ",
> > >  DECL_SOURCE_FILE (current_function_decl),
> > >  DECL_SOURCE_LINE (current_function_decl));
> > >
> >
> >
>
> --
> Richard Guenther 
> SUSE / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer



[patch] Fix PR tree-optimization/50178

2011-09-01 Thread Ira Rosen
Hi,

When vectorizing a function call we replace the original call with a
dummy statement to ensure that DCE later removes it. We also remove
its stmt_vec_info, which causes the segfault when we try to access it
through related pattern stmt. The following patch updates related
pattern stmt to be the dummy stmt.

Bootstrapped and tested on powerpc64-suse-linux.
OK for 4.6?

Thanks,
Ira

ChangeLog:

 PR tree-optimization/50178
 * tree-vect-stmts.c (vectorizable_call): Update the related
pattern statement
 before deleting the original call.
 (vect_transform_stmt): Don't expect the related pattern statement match the
 original statement after transformation.

testsuite/ChangeLog:

 PR tree-optimization/50178
 * gfortran.dg/vect/pr50178.f90: New test.

Index: testsuite/gfortran.dg/vect/pr50178.f90
===
--- testsuite/gfortran.dg/vect/pr50178.f90  (revision 0)
+++ testsuite/gfortran.dg/vect/pr50178.f90  (revision 0)
@@ -0,0 +1,29 @@
+! { dg-do compile }
+
+module yemdyn
+   implicit none
+   integer, parameter :: jpim = selected_int_kind(9)
+   integer, parameter :: jprb = selected_real_kind(13,300)
+   real(kind=jprb) :: elx
+   real(kind=jprb), allocatable :: xkcoef(:)
+   integer(kind=jpim),allocatable :: ncpln(:), npne(:)
+end module yemdyn
+
+subroutine suedyn
+
+   use yemdyn
+
+   implicit none
+
+   integer(kind=jpim) :: jm, jn
+   real(kind=jprb) :: zjm, zjn, zxxx
+
+   jn=0
+   do jm=0,ncpln(jn)
+  zjm=real(jm,jprb) / elx
+  xkcoef(npne(jn)+jm) = - zxxx*(zjm**2)**0.5_jprb
+   end do
+
+end subroutine suedyn
+
+! { dg-final { cleanup-tree-dump "vect" } }
Index: tree-vect-stmts.c
===
--- tree-vect-stmts.c   (revision 178373)
+++ tree-vect-stmts.c   (working copy)
@@ -1583,6 +1583,14 @@ vectorizable_call (gimple stmt, gimple_stmt_iterat
   new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
  build_zero_cst (type));
   set_vinfo_for_stmt (new_stmt, stmt_info);
+  /* For pattern statements make the related statement to point to
+ NEW_STMT in order to be able to retrieve the original statement
+ information later.  */
+  if (is_pattern_stmt_p (stmt_info))
+{
+  gimple related = STMT_VINFO_RELATED_STMT (stmt_info);
+  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (related)) = new_stmt;
+}
   set_vinfo_for_stmt (stmt, NULL);
   STMT_VINFO_STMT (stmt_info) = new_stmt;
   gsi_replace (gsi, new_stmt, false);
@@ -4957,11 +4965,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iter
 the stmt_info of ORIG_STMT_IN_PATTERN.  See more details in the
 documentation of vect_pattern_recog.  */
  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
-   {
- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo)
-   == orig_scalar_stmt);
- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
-   }
+   STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
}
 }


Re: [patch] Fix PR tree-optimization/50178

2011-09-01 Thread Ira Rosen
On 1 September 2011 11:16, Jakub Jelinek  wrote:
> On Thu, Sep 01, 2011 at 10:14:29AM +0200, Richard Guenther wrote:
>> On Thu, Sep 1, 2011 at 10:12 AM, Ira Rosen  wrote:
>> > When vectorizing a function call we replace the original call with a
>> > dummy statement to ensure that DCE later removes it. We also remove
>> > its stmt_vec_info, which causes the segfault when we try to access it
>> > through related pattern stmt. The following patch updates related
>> > pattern stmt to be the dummy stmt.
>> >
>> > Bootstrapped and tested on powerpc64-suse-linux.
>> > OK for 4.6?
>>
>> Ok.
>
> Please also commit the testcase into the trunk.

Tested on powerpc64-suse-linux.
Committed to trunk.

Ira

testsuite/ChangeLog:

PR tree-optimization/50178
* gfortran.dg/vect/pr50178.f90: New test.

Index: testsuite/gfortran.dg/vect/pr50178.f90
===
--- testsuite/gfortran.dg/vect/pr50178.f90  (revision 0)
+++ testsuite/gfortran.dg/vect/pr50178.f90  (revision 0)
@@ -0,0 +1,29 @@
+! { dg-do compile }
+
+module yemdyn
+   implicit none
+   integer, parameter :: jpim = selected_int_kind(9)
+   integer, parameter :: jprb = selected_real_kind(13,300)
+   real(kind=jprb) :: elx
+   real(kind=jprb), allocatable :: xkcoef(:)
+   integer(kind=jpim),allocatable :: ncpln(:), npne(:)
+end module yemdyn
+
+subroutine suedyn
+
+   use yemdyn
+
+   implicit none
+
+   integer(kind=jpim) :: jm, jn
+   real(kind=jprb) :: zjm, zjn, zxxx
+
+   jn=0
+   do jm=0,ncpln(jn)
+  zjm=real(jm,jprb) / elx
+  xkcoef(npne(jn)+jm) = - zxxx*(zjm**2)**0.5_jprb
+   end do
+
+end subroutine suedyn
+
+! { dg-final { cleanup-tree-dump "vect" } }


>
>> >     PR tree-optimization/50178
>> >     * tree-vect-stmts.c (vectorizable_call): Update the related
>> > pattern statement
>> >     before deleting the original call.
>> >     (vect_transform_stmt): Don't expect the related pattern statement 
>> > match the
>> >     original statement after transformation.
>> >
>> > testsuite/ChangeLog:
>> >
>> >     PR tree-optimization/50178
>> >     * gfortran.dg/vect/pr50178.f90: New test.
>
>        Jakub
>


Re: [PATCH] Make vectorizer dumps more comparable

2011-09-01 Thread Ira Rosen


Richard Guenther  wrote on 01/09/2011 11:13:29 AM:

> > > > IMO it's a bad idea. It's now impossible to find anything when
> > compiling a
> > > > big file. How about only removing the file name?
> > >
> > > How about, as Micha suggested, print the location of the loop
> > > we currently investigate from vectorize_loops () where we
> > > call find_loop_location () instead?
> >
> > The problem is that a dump of a single loop can be pretty long, and
"start
> > to analyze loop..."/"finish to analyze loop..." may be not visible
enough.
> > I am OK with adding these printings though (in addition to line
numbers).
> >
> > I understand why you didn't like to see the file location, but what's
the
> > problem with the line number?
>
> Well, it seems to be different what everybody else does and it's
> highly redundant for a whole bunch of lines.
>
> But, it solves my diff issue and the overly long lines as well.
>
> Your patch changes both dump-file and stderr printing though,
> I did want to preserve stderr printing.

OK.

>
> For the dump-file I'd drop the 'line ' prefix and just print '%d: '.

OK.

>
> Btw, the diagnostic machinery does _not_ print locations
> for note (""), the location information is supposed to be printed
> in the heading warning/error.  Thus, a much better format for stderr
> would be
>
> file.c:12: LOOP NOT VECTORIZED
> note: unsupported stmt ''
>
> as the further notes will be printed with the 'loop location' which
> is confusing when dumping statements

We usually print only one line, like

file.c:12: note:  

so I don't really understand this part.

Ira

>
> Richard.
>
> > Ira
> >
> > >
> > > Richard.
> > >
> > > > Index: tree-vectorizer.c
> > > > ===
> > > > --- tree-vectorizer.c   (revision 178374)
> > > > +++ tree-vectorizer.c   (working copy)
> > > > @@ -149,16 +149,12 @@ vect_print_dump_info (enum vect_verbosit
> > > >if (!current_function_decl || !vect_dump)
> > > >  return false;
> > > >
> > > > -  if (dump_file)
> > > > -fprintf (vect_dump, "\n");
> > > > -
> > > > -  else if (vect_location == UNKNOWN_LOC)
> > > > -fprintf (vect_dump, "\n%s:%d: note: ",
> > > > -DECL_SOURCE_FILE (current_function_decl),
> > > > +  if (vect_location == UNKNOWN_LOC)
> > > > +fprintf (vect_dump, "\nline %d: ",
> > > >  DECL_SOURCE_LINE (current_function_decl));
> > > >else
> > > > -fprintf (vect_dump, "\n%s:%d: note: ",
> > > > -LOC_FILE (vect_location), LOC_LINE (vect_location));
> > > > +fprintf (vect_dump, "\nline %d: ",
> > > > +LOC_LINE (vect_location));
> > > >
> > > >return true;
> > > >  }
> > > >
> > > > Ira
> > > >
> > > > >
> > > > > Index: gcc/tree-vectorizer.c
> > > > >
===
> > > > > --- gcc/tree-vectorizer.c   (revision 178028)
> > > > > +++ gcc/tree-vectorizer.c   (working copy)
> > > > > @@ -149,7 +149,10 @@ vect_print_dump_info (enum vect_verbosit
> > > > >if (!current_function_decl || !vect_dump)
> > > > >  return false;
> > > > >
> > > > > -  if (vect_location == UNKNOWN_LOC)
> > > > > +  if (dump_file)
> > > > > +fprintf (vect_dump, "\n");
> > > > > +
> > > > > +  else if (vect_location == UNKNOWN_LOC)
> > > > >  fprintf (vect_dump, "\n%s:%d: note: ",
> > > > >  DECL_SOURCE_FILE (current_function_decl),
> > > > >  DECL_SOURCE_LINE (current_function_decl));
> > > > >
> > > >
> > > >
> > >
> > > --
> > > Richard Guenther 
> > > SUSE / SUSE Labs
> > > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> > > GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer
> >
> >
>
> --
> Richard Guenther 
> SUSE / SUSE Labs
> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer



Re: [PATCH] Make vectorizer dumps more comparable

2011-09-01 Thread Ira Rosen


Richard Guenther  wrote on 01/09/2011 12:26:25 PM:

> > > Well, it seems to be different what everybody else does and it's
> > > highly redundant for a whole bunch of lines.
> > >
> > > But, it solves my diff issue and the overly long lines as well.
> > >
> > > Your patch changes both dump-file and stderr printing though,
> > > I did want to preserve stderr printing.
> >
> > OK.
> >
> > >
> > > For the dump-file I'd drop the 'line ' prefix and just print '%d: '.
> >
> > OK.
> >
> > >
> > > Btw, the diagnostic machinery does _not_ print locations
> > > for note (""), the location information is supposed to be printed
> > > in the heading warning/error.  Thus, a much better format for stderr
> > > would be
> > >
> > > file.c:12: LOOP NOT VECTORIZED
> > > note: unsupported stmt ''
> > >
> > > as the further notes will be printed with the 'loop location' which
> > > is confusing when dumping statements
> >
> > We usually print only one line, like
> >
> > file.c:12: note:  
> >
> > so I don't really understand this part.
>
> It's a general note.  With -ftree-vectorizer-verbose=5 we dump a lot
> of information with the same location (that of the loop header),
> but the individual messages refer not only to the overall loop
> but to specific statements, etc.
>
> This all is of course an artifact of sharing the dump file code
> with the reporting code.

I see.

Here is the new patch, I'll commit it after testing (on
powerpc64-suse-linux) if there are no objections.

Ira


ChangeLog:

* tree-vectorizer.c (vect_print_dump_info): Print line
number when dumping to a file.
(vectorize_loops): Add new messages to dump file.


Index: tree-vectorizer.c
===
--- tree-vectorizer.c   (revision 178396)
+++ tree-vectorizer.c   (working copy)
@@ -149,16 +149,12 @@ vect_print_dump_info (enum vect_verbosity_levels v
   if (!current_function_decl || !vect_dump)
 return false;

-  if (dump_file)
-fprintf (vect_dump, "\n");
-
-  else if (vect_location == UNKNOWN_LOC)
+  if (vect_location == UNKNOWN_LOC)
 fprintf (vect_dump, "\n%s:%d: note: ",
 DECL_SOURCE_FILE (current_function_decl),
 DECL_SOURCE_LINE (current_function_decl));
   else
-fprintf (vect_dump, "\n%s:%d: note: ",
-LOC_FILE (vect_location), LOC_LINE (vect_location));
+fprintf (vect_dump, "\n%d: ", LOC_LINE (vect_location));

   return true;
 }
@@ -199,12 +195,22 @@ vectorize_loops (void)
loop_vec_info loop_vinfo;

vect_location = find_loop_location (loop);
+if (vect_location != UNKNOWN_LOC
+&& vect_verbosity_level > REPORT_NONE)
+ fprintf (vect_dump, "\nAnalyzing loop at %s:%d\n",
+LOC_FILE (vect_location), LOC_LINE (vect_location));
+
loop_vinfo = vect_analyze_loop (loop);
loop->aux = loop_vinfo;

if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
  continue;

+if (vect_location != UNKNOWN_LOC
+&& vect_verbosity_level > REPORT_NONE)
+  fprintf (vect_dump, "\n\nVectorizing loop at %s:%d\n",
+LOC_FILE (vect_location), LOC_LINE (vect_location));
+
vect_transform_loop (loop_vinfo);
num_vectorized_loops++;
   }




[patch] Fix PR tree-optimization/50208

2011-09-04 Thread Ira Rosen
Hi,

While analyzing def stmt in vectorizer pattern detection, we access
its stmt_vec_info which is initialized only for statements inside the
loop being analyzed. Hence if the def stmt is outside the loop, we get
a segfault. This patch checks that a statement is inside the loop
before accessing its stmt_vec_info.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

2011-09-04  Jakub Jelinek  
Ira Rosen  

 PR tree-optimization/50208
 * tree-vect-patterns.c (vect_handle_widen_mult_by_const):
 Add an argument.  Check that def_stmt is inside the loop.
(vect_recog_widen_mult_pattern): Update calls to
vect_handle_widen_mult_by_cons.
(vect_operation_fits_smaller_type): Check that def_stmt is
inside the loop.

testsuite/ChangeLog:

2011-09-04  Jakub Jelinek  
Ira Rosen  

 PR tree-optimization/50208
 * gcc.dg/vect/no-fre-pre-pr50208.c: New test.
 * gcc.dg/vect/vect.exp: Run no-fre-pre-*.c tests with
 -fno-tree-fre -fno-tree-pre.

Index: testsuite/gcc.dg/vect/no-fre-pre-pr50208.c
===
--- testsuite/gcc.dg/vect/no-fre-pre-pr50208.c  (revision 0)
+++ testsuite/gcc.dg/vect/no-fre-pre-pr50208.c  (revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+char c;
+int a, b;
+
+void foo (int j)
+{
+  int i;
+  while (--j)
+{
+  b = 3;
+  for (i = 0; i < 2; ++i)
+a = b ^ c;
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect.exp
===
--- testsuite/gcc.dg/vect/vect.exp  (revision 178506)
+++ testsuite/gcc.dg/vect/vect.exp  (working copy)
@@ -263,6 +263,12 @@ lappend DEFAULT_VECTCFLAGS "-fno-tree-fre"
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]]  \
 "" $DEFAULT_VECTCFLAGS

+# -fno-tree-fre -fno-tree-pre
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]]  \
+"" $DEFAULT_VECTCFLAGS
+
 # Clean up.
 set dg-do-what-default ${save-dg-do-what-default}
Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 178506)
+++ tree-vect-patterns.c(working copy)
@@ -344,12 +344,14 @@ vect_recog_dot_prod_pattern (VEC (gimple, heap) **
replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t;  */

 static bool
-vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd,
+vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
 VEC (gimple, heap) **stmts, tree type,
 tree *half_type, gimple def_stmt)
 {
   tree new_type, new_oprnd, tmp;
   gimple new_stmt;
+  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
+  struct loop *loop = LOOP_VINFO_LOOP (loop_info);

   if (int_fits_type_p (const_oprnd, *half_type))
 {
@@ -359,6 +361,8 @@ static bool
 }

   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
+  || !gimple_bb (def_stmt)
+  || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
   || !vinfo_for_stmt (def_stmt))
 return false;

@@ -527,7 +531,8 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap)
 {
   if (TREE_CODE (oprnd0) == INTEGER_CST
  && TREE_CODE (half_type1) == INTEGER_TYPE
-  && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type,
+  && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
+  stmts, type,
  &half_type1, def_stmt1))
 half_type0 = half_type1;
   else
@@ -537,7 +542,8 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap)
 {
   if (TREE_CODE (oprnd1) == INTEGER_CST
   && TREE_CODE (half_type0) == INTEGER_TYPE
-  && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type,
+  && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
+  stmts, type,
  &half_type0, def_stmt0))
 half_type1 = half_type0;
   else
@@ -868,6 +874,8 @@ vect_operation_fits_smaller_type (gimple stmt, tre
   tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
   gimple def_stmt, new_stmt;
   bool first = false;
+  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
+  struct loop *loop = LOOP_VINFO_LOOP (loop_info);

   *new_def_stmt = NULL;

@@ -898,6 +906,8 @@ vect_operation_fits_smaller_type (gimple stmt, tre
 {
   first = true;
   if (!widened_name_p (oprn

Re: [patch, ARM] Change default vector size to 128 bits - take 3

2011-09-06 Thread Ira Rosen
On 17 August 2011 15:49, Richard Earnshaw  wrote:
> On 16/08/11 10:28, Ira Rosen wrote:
>> Hi,
>>
>> This patch changes the default vector size for auto-vectorization on
>> ARM NEON to 128 bits. This new version is a result of a discussion
>> with Richard and Ramana.
>>
>> wwwdocs changes will follow shortly.
>>
>> Bootstrapped and tested on arm-linux-gnueabi. The testsuite changes
>> were also checked on powerpc64-suse-linux and x86_64-suse-linux.
>>
>> There is one new failure:
>> gcc.c-torture/execute/mode-dependent-address.c fails with -O3
>> -funroll-loops with this patch or with -mvectorize-with-neon-quad.
>> Ramana has a patch to fix this
>> http://gcc.gnu.org/ml/gcc/2011-08/msg00284.html. I will wait with
>> committing my patch until this issue is resolved.
>>
>> OK for mainline?
>>
>> Thanks,
>> Ira
>>
>> ChangeLog:
>>
>>    * config/arm/arm.c (arm_preferred_simd_mode): Check
>>    TARGET_NEON_VECTORIZE_DOUBLE instead of
>>    TARGET_NEON_VECTORIZE_QUAD.
>>    (arm_expand_sync): Likewise.
>>    * config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
>>    mask of mvectorize-with-neon-double.  Add RejectNegative.
>>    (mvectorize-with-neon-double): New.
>>
>> testsuite/ChangeLog:
>>
>>    * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
>>    New procedure.
>>    (add_options_for_quad_vectors): Replace with ...
>>    (add_options_for_double_vectors): ... this.
>>    * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
>>     support multiple vector sizes since the vectorizer attempts to
>>     vectorize with both vector sizes.
>>    * gcc.dg/vect/no-vfa-vect-79.c,
>>     gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
>>     gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
>>     gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
>>     gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
>>     gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
>>     gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
>>    * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
>>    * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
>>    gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
>>    gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
>>    gcc.dg/vect/vect-40.c: Likewise.
>>   * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
>>    redundant.
>>   * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
>>    gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
>>    gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
>>    gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
>>    Likewise.
>>  * gcc.dg/vect/vect-peel-4.c: Make ia global.
>>
>
> Ok with the following change:
>
>>  static unsigned int
>>  arm_autovectorize_vector_sizes (void)
>>  {
>> -  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
>> +  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : 16 | 8;
>>  }
>
>
> Please put parentheses round the expression to make the precedence explicit.

I added parentheses and committed the patch.

Thanks,
Ira

>
> R.
>
>


[patch] Allow not simple ivs in SLP

2011-09-14 Thread Ira Rosen
Hi,

This patch makes data-refs analysis to not fail if simple_iv returns
false in basic block SLP.

Bootstrapped and tested on powerpc64-suse-linux.
OK for mainline?

Thanks,
Ira

ChangeLog:

 * tree-data-ref.c (dr_analyze_innermost): Rename to...
 (dr_analyze_innermost_1): ... this.  Add new argument.
 Allow not simple iv if analyzing basic block.
 (dr_analyze_innermost): Call dr_analyze_innermost_1.
 (create_data_ref): Call dr_analyze_innermost_1.

testsuite/ChangeLog:

 * gcc.dg/vect/bb-slp-24.c: New test.

Index: tree-data-ref.c
===
--- tree-data-ref.c (revision 178755)
+++ tree-data-ref.c (working copy)
@@ -722,11 +722,11 @@ canonicalize_base_object_address (tree addr)
 }

 /* Analyzes the behavior of the memory reference DR in the innermost loop or
-   basic block that contains it. Returns true if analysis succeed or false
+   basic block that contains it.  Returns true if analysis succeed or false
otherwise.  */

-bool
-dr_analyze_innermost (struct data_reference *dr)
+static bool
+dr_analyze_innermost_1 (struct data_reference *dr, struct loop *nest)
 {
   gimple stmt = DR_STMT (dr);
   struct loop *loop = loop_containing_stmt (stmt);
@@ -769,14 +769,25 @@ canonicalize_base_object_address (tree addr)
 }
   else
 base = build_fold_addr_expr (base);
+
   if (in_loop)
 {
   if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
   false))
 {
-  if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "failed: evolution of base is not affine.\n");
-  return false;
+  if (nest)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "failed: evolution of base is not"
+" affine.\n");
+return false;
+}
+  else
+{
+  base_iv.base = base;
+  base_iv.step = ssize_int (0);
+  base_iv.no_overflow = true;
+}
 }
 }
   else
@@ -801,10 +812,18 @@ canonicalize_base_object_address (tree addr)
   else if (!simple_iv (loop, loop_containing_stmt (stmt),
poffset, &offset_iv, false))
 {
-  if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "failed: evolution of offset is not"
-" affine.\n");
-  return false;
+  if (nest)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fprintf (dump_file, "failed: evolution of offset is not"
+" affine.\n");
+  return false;
+}
+  else
+{
+  offset_iv.base = poffset;
+  offset_iv.step = ssize_int (0);
+}
 }
 }

@@ -832,6 +851,19 @@ canonicalize_base_object_address (tree addr)
   return true;
 }

+/* Analyzes the behavior of the memory reference DR in the innermost loop or
+   basic block that contains it.  Returns true if analysis succeed or false
+   otherwise.  */
+
+bool
+dr_analyze_innermost (struct data_reference *dr)
+{
+  gimple stmt = DR_STMT (dr);
+  struct loop *loop = loop_containing_stmt (stmt);
+
+  return dr_analyze_innermost_1 (dr, loop);
+}
+
 /* Determines the base object and the list of indices of memory reference
DR, analyzed in LOOP and instantiated in loop nest NEST.  */

@@ -972,7 +1004,7 @@ create_data_ref (loop_p nest, loop_p loop, tree me
   DR_REF (dr) = memref;
   DR_IS_READ (dr) = is_read;

-  dr_analyze_innermost (dr);
+  dr_analyze_innermost_1 (dr, nest);
   dr_analyze_indices (dr, nest, loop);
   dr_analyze_alias (dr);

Index: testsuite/gcc.dg/vect/bb-slp-24.c
===
--- testsuite/gcc.dg/vect/bb-slp-24.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-24.c   (revision 0)
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 3
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict dst, short * __restrict src, int h, int stride)
+{
+int i;
+h /= 8;
+for (i = 0; i < h; i++) {
+dst[0] += A*src[0];
+dst[1] += A*src[1];
+dst[2] += A*src[2];
+dst[3] += A*src[3];
+dst[4] += A*src[4];
+dst[5] += A*src[5];
+dst[6] += A*src[6];
+dst[7] += A*src[7];
+dst += stride;
+src += stride;
+}
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  dst[i] = 0;
+  src[i] = i;
+}
+
+  foo (dst, src, N, 8);
+
+  for (i = 0; i < N; i++)
+{
+  if (dst[i] != A * i)
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1 "slp" { target vect_element_a

[patch] Allow read-after-read dependence in basic block SLP

2011-09-15 Thread Ira Rosen
Bootstrapped and tested on powerpc64-suse-linux.
Committed to trunk.

Ira

ChangeLog:

   * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
   read-after-read dependencies in basic block SLP.

testsuite/ChangeLog:

   * gcc.dg/vect/bb-slp-25.c: New.

Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c   (revision 178879)
+++ tree-vect-data-refs.c   (working copy)
@@ -607,6 +607,11 @@ vect_analyze_data_ref_dependence (struct data_depe
   if (vect_check_interleaving (dra, drb))
  return false;

+  /* Read-read is OK (we need this check here, after checking for
+ interleaving).  */
+  if (DR_IS_READ (dra) && DR_IS_READ (drb))
+return false;
+
   if (vect_print_dump_info (REPORT_DR_DETAILS))
 {
   fprintf (vect_dump, "can't determine dependence between ");
Index: testsuite/gcc.dg/vect/bb-slp-25.c
===
--- testsuite/gcc.dg/vect/bb-slp-25.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-25.c   (revision 0)
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict dst, short * __restrict src, int h, int stride)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+{
+  dst[0] += A*src[0] + src[stride];
+  dst[1] += A*src[1] + src[1+stride];
+  dst[2] += A*src[2] + src[2+stride];
+  dst[3] += A*src[3] + src[3+stride];
+  dst[4] += A*src[4] + src[4+stride];
+  dst[5] += A*src[5] + src[5+stride];
+  dst[6] += A*src[6] + src[6+stride];
+  dst[7] += A*src[7] + src[7+stride];
+  dst += 8;
+  src += 8;
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+   dst[i] = 0;
+   src[i] = i;
+}
+
+  foo (dst, src, N, 8);
+
+  for (i = 0; i < N/2; i++)
+{
+  if (dst[i] != A * i + i + 8)
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+


[patch] Fix PR tree-optimization/50412

2011-09-18 Thread Ira Rosen
Hi,

Strided accesses of single element or with gaps may require creation
of epilogue loop. At the moment we don't support peeling for outer
loops, therefore, we should not allow such strided accesses in outer
loops.

Bootstrapped and tested on powerpc64-suse-linux.
Committed to trunk.

Now testing for 4.6.
OK for 4.6 when the testing completes?

Thanks,
Ira

ChangeLog:

PR tree-optimization/50412
* tree-vect-data-refs.c (vect_analyze_group_access): Fail for
acceses that require epilogue loop if vectorizing outer loop.

testsuite/ChangeLog:

PR tree-optimization/50412
* gfortran.dg/vect/pr50412.f90: New.



Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c   (revision 178939)
+++ tree-vect-data-refs.c   (working copy)
@@ -2060,7 +2060,11 @@ vect_analyze_group_access (struct data_reference *
   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
   HOST_WIDE_INT stride, last_accessed_element = 1;
   bool slp_impossible = false;
+  struct loop *loop = NULL;

+  if (loop_vinfo)
+loop = LOOP_VINFO_LOOP (loop_vinfo);
+
   /* For interleaving, STRIDE is STEP counted in elements, i.e., the
size of the
  interleaving group (including gaps).  */
   stride = dr_step / type_size;
@@ -2090,11 +2094,18 @@ vect_analyze_group_access (struct data_reference *

  if (loop_vinfo)
{
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
-
  if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Data access with gaps requires scalar "
"epilogue loop");
+  if (loop->inner)
+{
+  if (vect_print_dump_info (REPORT_DETAILS))
+fprintf (vect_dump, "Peeling for outer loop is not"
+" supported");
+  return false;
+}
+
+  LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
}

  return true;
@@ -2277,10 +2288,17 @@ vect_analyze_group_access (struct data_reference *
   /* There is a gap in the end of the group.  */
   if (stride - last_accessed_element > 0 && loop_vinfo)
{
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
  if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Data access with gaps requires scalar "
"epilogue loop");
+  if (loop->inner)
+{
+  if (vect_print_dump_info (REPORT_DETAILS))
+fprintf (vect_dump, "Peeling for outer loop is not supported");
+  return false;
+}
+
+  LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
}
 }
Index: testsuite/gfortran.dg/vect/pr50412.f90
===
--- testsuite/gfortran.dg/vect/pr50412.f90  (revision 0)
+++ testsuite/gfortran.dg/vect/pr50412.f90  (revision 0)
@@ -0,0 +1,12 @@
+! { dg-do compile }
+
+  DOUBLE PRECISION AK,AI,AAE
+  COMMON/com/AK(36),AI(4,4),AAE(8,4),ii,jj
+  DO 20 II=1,4
+DO 21 JJ=1,4
+  AK(n)=AK(n)-AAE(I,II)*AI(II,JJ)
+   21   CONTINUE
+   20 CONTINUE
+  END
+
+! { dg-final { cleanup-tree-dump "vect" } }


[patch] Fix tree-optimization/50414

2011-09-18 Thread Ira Rosen
Hi,

This patch adds a missing handling of MAX/MIN_EXPR in SLP reduction.

Boostrapped and tested on powerpc64-suse-linux.
Committed to trunk.

Ira

ChangeLog:

PR tree-optimization/50414
* tree-vect-slp.c (vect_get_constant_vectors): Handle MAX_EXPR and
MIN_EXPR.

testsuite/ChangeLog:

PR tree-optimization/50414
* gfortran.dg/vect/Ofast-pr50414.f90: New.
* gfortran.dg/vect/vect.exp: Run Ofast-* tests with -Ofast.
* gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c: New.


Index: tree-vect-slp.c
===
--- tree-vect-slp.c (revision 178939)
+++ tree-vect-slp.c (working copy)
@@ -1902,6 +1902,8 @@ vect_get_constant_vectors (tree op, slp_tree slp_n
   bool constant_p, is_store;
   tree neutral_op = NULL;
   enum tree_code code = gimple_assign_rhs_code (stmt);
+  gimple def_stmt;
+  struct loop *loop;

   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
 {
@@ -1943,8 +1945,16 @@ vect_get_constant_vectors (tree op, slp_tree slp_n
 neutral_op = build_int_cst (TREE_TYPE (op), -1);
 break;

+  case MAX_EXPR:
+  case MIN_EXPR:
+def_stmt = SSA_NAME_DEF_STMT (op);
+loop = (gimple_bb (stmt))->loop_father;
+neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+loop_preheader_edge (loop));
+break;
+
   default:
- neutral_op = NULL;
+neutral_op = NULL;
 }
 }

@@ -1997,8 +2007,8 @@ vect_get_constant_vectors (tree op, slp_tree slp_n

   if (reduc_index != -1)
 {
-  struct loop *loop = (gimple_bb (stmt))->loop_father;
-  gimple def_stmt = SSA_NAME_DEF_STMT (op);
+  loop = (gimple_bb (stmt))->loop_father;
+  def_stmt = SSA_NAME_DEF_STMT (op);

   gcc_assert (loop);
Index: testsuite/gfortran.dg/vect/Ofast-pr50414.f90
===
--- testsuite/gfortran.dg/vect/Ofast-pr50414.f90(revision 0)
+++ testsuite/gfortran.dg/vect/Ofast-pr50414.f90(revision 0)
@@ -0,0 +1,11 @@
+! { dg-do compile }
+
+  SUBROUTINE  SUB  (A,L,YMAX)
+  DIMENSION A(L)
+  YMA=A(1)
+  DO 2 I=1,L,2
+2 YMA=MAX(YMA,A(I),A(I+1))
+  CALL PROUND(YMA)
+  END
+
+! { dg-final { cleanup-tree-dump "vect" } }
Index: testsuite/gfortran.dg/vect/vect.exp
===
--- testsuite/gfortran.dg/vect/vect.exp (revision 178939)
+++ testsuite/gfortran.dg/vect/vect.exp (working copy)
@@ -84,6 +84,12 @@ lappend DEFAULT_VECTCFLAGS "-O3"
 dg-runtest [lsort [glob -nocomplain
$srcdir/$subdir/O3-*.\[fF\]{,90,95,03,08} ]]  \
 "" $DEFAULT_VECTCFLAGS

+# With -Ofast
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-Ofast"
+dg-runtest [lsort [glob -nocomplain
$srcdir/$subdir/Ofast-*.\[fF\]{,90,95,03,08} ]]  \
+"" $DEFAULT_VECTCFLAGS
+
 # Clean up.
 set dg-do-what-default ${save-dg-do-what-default}

Index: testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
===
--- testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c(revision 0)
+++ testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c(revision 0)
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define N 16
+#define MAX 121
+
+unsigned int ub[N] = {0,3,6,9,12,15,18,121,24,27,113,33,36,39,42,45};
+
+/* Vectorization of reduction using loop-aware SLP (with unrolling).  */
+
+__attribute__ ((noinline))
+int main1 (int n)
+{
+  int i;
+  unsigned int max = 50;
+
+  for (i = 0; i < n; i++) {
+max = max < ub[2*i] ? ub[2*i] : max;
+max = max < ub[2*i + 1] ? ub[2*i + 1] : max;
+  }
+
+  /* Check results:  */
+  if (max != MAX)
+abort ();
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 (N/2);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
xfail vect_no_int_max } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1
"vect" { xfail vect_no_int_max } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+


[patch] Fix PR testsuite/50435

2011-09-18 Thread Ira Rosen
Hi,

This patch adds an if-statement to avoid loop vectorization and fixes
underscores around restrict in gcc.dg/vect/bb-slp-25.c.

Tested by Dominique on x86_64-apple-darwin10 and on x86_64-suse-linux.

Committed to trunk.

Ira


2011-09-18  Dominique d'Humieres  
      Ira Rosen  

PR testsuite/50435
* gcc.dg/vect/bb-slp-25.c: Add an if to avoid loop vectorization.
Fix underscores around restrict.

Index: testsuite/gcc.dg/vect/bb-slp-25.c
===
--- testsuite/gcc.dg/vect/bb-slp-25.c   (revision 178940)
+++ testsuite/gcc.dg/vect/bb-slp-25.c   (working copy)
@@ -9,7 +9,7 @@

 short src[N], dst[N];

-void foo (short * __restrict dst, short * __restrict src, int h, int stride)
+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
int stride, int dummy)
 {
   int i;
   h /= 16;
@@ -25,6 +25,8 @@ void foo (short * __restrict dst, short
   dst[7] += A*src[7] + src[7+stride];
   dst += 8;
   src += 8;
+  if (dummy == 32)
+abort ();
}
 }

@@ -41,7 +43,7 @@ int main (void)
src[i] = i;
 }

-  foo (dst, src, N, 8);
+  foo (dst, src, N, 8, 0);

   for (i = 0; i < N/2; i++)
 {


[patch] Fix PR tree-optmization/50413

2011-09-19 Thread Ira Rosen
Hi,

When we can't vectorize a certain statement in SLP we mark it as not
vectorizable and continue with the analysis. This is wrong when the
reason for the failure is that we can't analyze a data-ref, because
this way we may miss a data dependence. This patch fails SLP if the
data-refs analysis fails.

Bootstrapped and tested on powerpc64-suse-linux and i486-linux-gnu.
Committed to trunk.

The same patch bootstrapped and tested on powerpc64-suse-linux for 4.6.
O.K. for 4.6?

Thanks,
Ira

ChangeLog:

PR tree-optimization/50413
* tree-vect-data-refs.c (vect_analyze_data_refs): Fail to vectorize
a basic block if one of its data-refs can't be analyzed.

testsuite/ChangeLog:

PR tree-optimization/50413
* g++.dg/vect/vect.exp: Run slp-pr* tests with
-fdump-tree-slp-details.  Run other tests with
-fdump-tree-vect-details.
* g++.dg/vect/slp-pr50413.cc: New.
Index: ChangeLog
===
--- ChangeLog   (revision 178967)
+++ ChangeLog   (working copy)
@@ -1,3 +1,9 @@
+2011-09-19  Ira Rosen  
+
+   PR tree-optimization/50413
+   * tree-vect-data-refs.c (vect_analyze_data_refs): Fail to vectorize
+   a basic block if one of its data-refs can't be analyzed.
+
 2011-09-19  Paul Brook  
 
gcc/
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog (revision 178967)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,11 @@
+2011-09-19  Ira Rosen  
+
+   PR tree-optimization/50413
+   * g++.dg/vect/vect.exp: Run slp-pr* tests with
+   -fdump-tree-slp-details.  Run other tests with
+   -fdump-tree-vect-details.
+   * g++.dg/vect/slp-pr50413.cc: New.
+
 2011-09-18  Dominique d'Humieres  
Ira Rosen  
 
@@ -4,7 +12,7 @@
PR testsuite/50435
* gcc.dg/vect/bb-slp-25.c: Add an if to avoid loop vectorization.
Fix underscores around restrict.
- 
+
 2011-09-18  Ira Rosen  
 
PR tree-optimization/50414
Index: testsuite/g++.dg/vect/vect.exp
===
--- testsuite/g++.dg/vect/vect.exp  (revision 178967)
+++ testsuite/g++.dg/vect/vect.exp  (working copy)
@@ -40,9 +40,14 @@ global DEFAULT_VECTCFLAGS
 set DEFAULT_VECTCFLAGS ""
 
 # These flags are used for all targets.
-lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fno-vect-cost-model" \
-  "-ftree-vectorizer-verbose=4" "-fdump-tree-vect-stats"
+lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fno-vect-cost-model"
 
+set VECT_SLP_CFLAGS $DEFAULT_VECTCFLAGS
+
+lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
+lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
+
+
 # Skip these tests for targets that do not support generating vector
 # code.  Set additional target-dependent vector flags, which can be
 # overridden by using dg-options in individual tests.
@@ -54,8 +59,10 @@ if ![check_vect_support_and_set_flags] {
 dg-init
 
 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.{c,cc,S} ]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.{c,cc,S} ]] \
 "" $DEFAULT_VECTCFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-pr*.{c,cc,S} ]] \
+"" $VECT_SLP_CFLAGS
 
  Tests with special options
 global SAVED_DEFAULT_VECTCFLAGS
Index: testsuite/g++.dg/vect/slp-pr50413.cc
===
--- testsuite/g++.dg/vect/slp-pr50413.cc(revision 0)
+++ testsuite/g++.dg/vect/slp-pr50413.cc(revision 0)
@@ -0,0 +1,185 @@
+/* { dg-require-effective-target vect_int } */
+
+typedef unsigned long long UInt64;
+
+typedef struct struct128
+{
+  UInt64 uint64_lower;
+  UInt64 uint64_upper;
+}
+STRUCT_128;
+
+typedef union uint128_bitmap
+{
+  STRUCT_128 uint128;
+  
+struct
+{
+  UInt64 b00 : 1;
+  UInt64 b01 : 1;
+  UInt64 b02 : 1;
+  UInt64 b03 : 1;
+  UInt64 b04 : 1;
+  UInt64 b05 : 1;
+  UInt64 b06 : 1;
+  UInt64 b07 : 1;
+  UInt64 b08 : 1;
+  UInt64 b09 : 1;
+  UInt64 b10 : 1;
+  UInt64 b11 : 1;
+  UInt64 b12 : 1;
+  UInt64 b13 : 1;
+  UInt64 b14 : 1;
+  UInt64 b15 : 1;
+  UInt64 b16 : 1;
+  UInt64 b17 : 1;
+  UInt64 b18 : 1;
+  UInt64 b19 : 1;
+  UInt64 b20 : 1;
+  UInt64 b21 : 1;
+  UInt64 b22 : 1;
+  UInt64 b23 : 1;
+  UInt64 b24 : 1;
+  UInt64 b25 : 1;
+  UInt64 b26 : 1;
+  UInt64 b27 : 1;
+  UInt64 b28 : 1;
+  UInt64 b29 : 1;
+  UInt64 b30 : 1;
+  UInt64 b31 : 1;
+  UInt64 b32 : 1;
+  UInt64 b33 : 1;
+  UInt64 b34 : 1;
+  UInt64 b35 : 1;
+  UInt64 b36 : 1;
+  UInt64 b37 : 1;
+  UInt64 b38 : 1;
+  UInt64 b39 : 1;
+   

[patch] Fix g++.dg/vect/slp-pr50413.cc

2011-09-19 Thread Ira Rosen
Hi,

g++.dg/vect/slp-pr50413.cc should check that the basic block in shift
() doesn't get vectorized. This patch removes other functions that
contain basic blocks that can be vectorizable on some platforms.

Tested on x86_64-suse-linux.
Committed to trunk.

Ira

testsuite/ChangeLog:

* g++.dg/vect/slp-pr50413.cc: Don't run the test.  Remove main ()
and get_bit ().


Index: testsuite/g++.dg/vect/slp-pr50413.cc
===
--- testsuite/g++.dg/vect/slp-pr50413.cc(revision 178998)
+++ testsuite/g++.dg/vect/slp-pr50413.cc(working copy)
@@ -1,3 +1,4 @@
+/* { dg-do compile } */
 /* { dg-require-effective-target vect_int } */

 typedef unsigned long long UInt64;
@@ -150,12 +151,6 @@ UInt128_BITMAP;

 UInt128_BITMAP V;

-template
-unsigned char get_bit(CAST value, unsigned char pos)
-{
-return ( value & (static_cast(1) << pos) ) != 0;
-}
-
 void shift(unsigned char t)
 {
   V.uint128.uint64_lower = (V.uint128.uint64_lower >> 1);
@@ -165,21 +160,6 @@ void shift(unsigned char t)
   V.bitmap.b96 = t;
 }

-int main()
-{
-   V.uint128.uint64_lower = 0;
-   V.uint128.uint64_upper = 0xd4004001;
-
-   UInt64 Kc = 0xDD1A1B8A8A5C2400;
-
-  for (int i = 0; i < 64; i++ )
-  {
-shift( get_bit( Kc, i) );
-  }
-
-   return 0;
-}
-
 /* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 0 "slp" } } */
 /* { dg-final { cleanup-tree-dump "slp" } } */


[patch] Fix PR tree-optimization/50451

2011-09-22 Thread Ira Rosen
Hi,

This patch adds a missing support of constant operands in reduction in SLP.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50451
* tree-vect-slp.c (vect_get_constant_vectors): Don't fail for
constant operands in reduction.
(vect_get_slp_defs): Don't create vector operand for NULL scalar
operand.

testsuite/ChangeLog:

PR tree-optimization/50451
* gcc.dg/vect/pr50451.c: New test.

Index: tree-vect-slp.c
===
--- tree-vect-slp.c (revision 179076)
+++ tree-vect-slp.c (working copy)
@@ -1905,14 +1905,9 @@ vect_get_constant_vectors (tree op, slp_tree slp_n
   gimple def_stmt;
   struct loop *loop;

-  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+  && reduc_index != -1)
 {
-  if (reduc_index == -1)
-{
-  VEC_free (tree, heap, *vec_oprnds);
-  return;
-}
-
   op_num = reduc_index - 1;
   op = gimple_op (stmt, reduc_index);
   /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
@@ -2164,7 +2159,7 @@ vect_get_slp_defs (tree op0, tree op1, slp_tree sl
 return;

   code = gimple_assign_rhs_code (first_stmt);
-  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
+  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
 return;

   /* The number of vector defs is determined by the number of vector statements
Index: testsuite/gcc.dg/vect/pr50451.c
===
--- testsuite/gcc.dg/vect/pr50451.c (revision 0)
+++ testsuite/gcc.dg/vect/pr50451.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int
+foo (int integral, int decimal, int power_ten)
+{
+  while (power_ten > 0)
+{
+  integral *= 10;
+  decimal *= 10;
+  power_ten--;
+}
+
+  return integral+decimal;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+


Re: [patch] Allow not simple ivs in SLP

2011-09-25 Thread Ira Rosen
On 23 September 2011 23:09, Richard Guenther  wrote:
> On Wed, Sep 14, 2011 at 2:01 PM, Ira Rosen  wrote:
>> Hi,
>>
>> This patch makes data-refs analysis to not fail if simple_iv returns
>> false in basic block SLP.
>>
>> Bootstrapped and tested on powerpc64-suse-linux.
>> OK for mainline?
>
> Ok if you instead of wrapping dr_analyze_innermost change the remaining
> callers to pass the loop nest parameter instead.

I committed the attached patch.

Thanks,
Ira

>
> Thanks,
> Richard.
>
Index: tree-loop-distribution.c
===
--- tree-loop-distribution.c(revision 179159)
+++ tree-loop-distribution.c(working copy)
@@ -268,7 +268,7 @@ generate_memset_zero (gimple stmt, tree op0, tree
 
   DR_STMT (dr) = stmt;
   DR_REF (dr) = op0;
-  res = dr_analyze_innermost (dr);
+  res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
   gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
 
   nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
Index: ChangeLog
===
--- ChangeLog   (revision 179159)
+++ ChangeLog   (working copy)
@@ -1,3 +1,13 @@
+2011-09-25  Ira Rosen  
+
+   * tree-data-ref.c (dr_analyze_innermost): Add new argument.
+   Allow not simple iv if analyzing basic block.
+   (create_data_ref): Update call to dr_analyze_innermost.
+   (stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
+   * tree-loop-distribution.c (generate_memset_zero): Likewise.
+   * tree-predcom.c (find_looparound_phi): Likewise.
+   * tree-data-ref.h (dr_analyze_innermost): Add new argument.
+
 2011-09-24  David S. Miller  
 
* config/sparc/sparc.h (FIRST_PSEUDO_REGISTER): Bump to 103.
Index: testsuite/gcc.dg/vect/bb-slp-24.c
===
--- testsuite/gcc.dg/vect/bb-slp-24.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-24.c   (revision 0)
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 3
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
+  int stride, int dummy)
+{
+  int i;
+  h /= 8;
+  for (i = 0; i < h; i++)
+{
+  dst[0] += A*src[0];
+  dst[1] += A*src[1];
+  dst[2] += A*src[2];
+  dst[3] += A*src[3];
+  dst[4] += A*src[4];
+  dst[5] += A*src[5];
+  dst[6] += A*src[6];
+  dst[7] += A*src[7];
+  dst += stride;
+  src += stride;
+  if (dummy == 32)
+abort ();
+}
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  dst[i] = 0;
+  src[i] = i;
+}
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N; i++)
+{
+  if (dst[i] != A * i)
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 
"slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/ChangeLog
===========
--- testsuite/ChangeLog (revision 179159)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,7 @@
+2011-09-25  Ira Rosen  
+
+   * gcc.dg/vect/bb-slp-24.c: New.
+
 2011-09-24  Jason Merrill  
 
* g++.dg/cpp0x/nsdmi-eh1.C: New.
Index: tree-data-ref.c
===
--- tree-data-ref.c (revision 179159)
+++ tree-data-ref.c (working copy)
@@ -722,11 +722,11 @@ canonicalize_base_object_address (tree addr)
 }
 
 /* Analyzes the behavior of the memory reference DR in the innermost loop or
-   basic block that contains it. Returns true if analysis succeed or false
+   basic block that contains it.  Returns true if analysis succeed or false
otherwise.  */
 
 bool
-dr_analyze_innermost (struct data_reference *dr)
+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 {
   gimple stmt = DR_STMT (dr);
   struct loop *loop = loop_containing_stmt (stmt);
@@ -769,14 +769,25 @@ bool
 }
   else
 base = build_fold_addr_expr (base);
+
   if (in_loop)
 {
   if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
   false))
 {
-  if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "failed: evolution of base is not affine.\n");
-  return false;
+  if (nest)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+fprintf (dump_file, "failed: evolution of base is not"
+" affine.\n");
+  return false;
+}
+  else
+

[patch] Support a choice of vector size in SLP

2011-09-25 Thread Ira Rosen
Hi,

This patch supports an automatic choice of vector size in basic block
vectorization similar to the loop vectorization case.

I am not sure about the new keyword.

Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
and arm-linux-gnueabi.

Thanks,
Ira

ChangeLog:

* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
of vect_analyze_bb here.
(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.

testsuite/ChangeLog:

* lib/target-supports.exp (check_effective_target_vect_half_size): New.
* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
of multiple vector sizes.
* gcc.dg/vect/bb-slp-26.c: New.

Index: testsuite/lib/target-supports.exp
===
--- testsuite/lib/target-supports.exp   (revision 179159)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
 return $et_vect_multiple_sizes_saved
 }

+# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
+
+proc check_effective_target_vect_half_size { } {
+global et_vect_half_size
+
+if [info exists et_vect_half_size_saved] {
+verbose "check_effective_target_vect_half_size: using cached result" 2
+} else {
+set et_vect_half_size_saved 0
+if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+   set et_vect_half_size_saved 1
+}
+}
+
+verbose "check_effective_target_vect_half_size: returning
$et_vect_half_size_saved" 2
+return $et_vect_half_size_saved
+}
+
 # Return 1 if the target supports section-anchors

 proc check_effective_target_section_anchors { } {
Index: testsuite/gcc.dg/vect/bb-slp-26.c
===
--- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+char src[N], dst[N];
+
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+{
+  dst[0] += A*src[0] + src[stride];
+  dst[1] += A*src[1] + src[1+stride];
+  dst[2] += A*src[2] + src[2+stride];
+  dst[3] += A*src[3] + src[3+stride];
+  dst[4] += A*src[4] + src[4+stride];
+  dst[5] += A*src[5] + src[5+stride];
+  dst[6] += A*src[6] + src[6+stride];
+  dst[7] += A*src[7] + src[7+stride];
+  dst += 8;
+  src += 8;
+  if (dummy == 32)
+abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+   dst[i] = 0;
+   src[i] = i/8;
+}
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+{
+  if (dst[i] != A * src[i] + src[i+8])
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1 "slp" { target vect_half_size } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/gcc.dg/vect/bb-slp-11.c
===
--- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159)
+++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy)
@@ -49,6 +49,7 @@ int main (void)
 }

 /* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
"slp" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
"slp" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "slp" } } */

Index: tree-vect-slp.c
===
--- tree-vect-slp.c (revision 179159)
+++ tree-vect-slp.c (working copy)
@@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb

 /* Check if the basic block can be vectorized.  */

-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
 {
   bb_vec_info bb_vinfo;
   VEC (ddr_p, heap) *ddrs;
   VEC (slp_instance, heap) *slp_instances;
   slp_instance instance;
-  int i, insns = 0;
-  gimple_stmt_iterator gsi;
+  int i;
   int min_vf = 2;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   bool data_dependence_in_bb = false;

-  current_vector_size = 0;
-
-  if (vect_print_dump_info (REPORT_DETAILS))
-fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-{
-  gimple stmt = gsi_stmt (gsi);
-  if (!is_gimple_debug (stmt)
- && !gimple_nop_p (stmt)
- && gimple_code (stmt) != GIMPLE_LABEL)
-   insns++;
-}
-
-  if (ins

Re: [patch] Support a choice of vector size in SLP

2011-09-25 Thread Ira Rosen
On 25 September 2011 14:45, Richard Guenther  wrote:
> On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen  wrote:
>> Hi,
>>
>> This patch supports an automatic choice of vector size in basic block
>> vectorization similar to the loop vectorization case.
>>
>> I am not sure about the new keyword.
>
> The testsuite one?  I guess we should name them vect128, vect256, etc.,
> as testcases will be looking for an absolute size, not a relative ("half") 
> one.

OK, changing it to:

Index: testsuite/lib/target-supports.exp
===
--- testsuite/lib/target-supports.exp   (revision 179159)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
 return $et_vect_multiple_sizes_saved
 }

+# Return 1 if the target supports vectors of 64 bits.
+
+proc check_effective_target_vect64 { } {
+global et_vect64
+
+if [info exists et_vect64_saved] {
+verbose "check_effective_target_vect64: using cached result" 2
+} else {
+set et_vect64_saved 0
+if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+   set et_vect64_saved 1
+}
+}
+
+verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
+return $et_vect64_saved
+}
+
 # Return 1 if the target supports section-anchors

 proc check_effective_target_section_anchors { } {


Thanks,
Ira

>
> Richard.
>
>> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
>> and arm-linux-gnueabi.
>>
>> Thanks,
>> Ira
>>
>> ChangeLog:
>>
>>        * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
>>        of vect_analyze_bb here.
>>        (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
>>
>> testsuite/ChangeLog:
>>
>>        * lib/target-supports.exp (check_effective_target_vect_half_size): 
>> New.
>>        * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
>>        of multiple vector sizes.
>>        * gcc.dg/vect/bb-slp-26.c: New.
>>
>> Index: testsuite/lib/target-supports.exp
>> ===
>> --- testsuite/lib/target-supports.exp   (revision 179159)
>> +++ testsuite/lib/target-supports.exp   (working copy)
>> @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
>>     return $et_vect_multiple_sizes_saved
>>  }
>>
>> +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
>> +
>> +proc check_effective_target_vect_half_size { } {
>> +    global et_vect_half_size
>> +
>> +    if [info exists et_vect_half_size_saved] {
>> +        verbose "check_effective_target_vect_half_size: using cached 
>> result" 2
>> +    } else {
>> +        set et_vect_half_size_saved 0
>> +        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) 
>> } {
>> +           set et_vect_half_size_saved 1
>> +        }
>> +    }
>> +
>> +    verbose "check_effective_target_vect_half_size: returning
>> $et_vect_half_size_saved" 2
>> +    return $et_vect_half_size_saved
>> +}
>> +
>>  # Return 1 if the target supports section-anchors
>>
>>  proc check_effective_target_section_anchors { } {
>> Index: testsuite/gcc.dg/vect/bb-slp-26.c
>> ===
>> --- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
>> +++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
>> @@ -0,0 +1,59 @@
>> +/* { dg-require-effective-target vect_int } */
>> +
>> +#include 
>> +#include "tree-vect.h"
>> +
>> +#define A 3
>> +#define B 4
>> +#define N 256
>> +
>> +char src[N], dst[N];
>> +
>> +void foo (char * __restrict__ dst, char * __restrict__ src, int h,
>> int stride, int dummy)
>> +{
>> +  int i;
>> +  h /= 16;
>> +  for (i = 0; i < h; i++)
>> +    {
>> +      dst[0] += A*src[0] + src[stride];
>> +      dst[1] += A*src[1] + src[1+stride];
>> +      dst[2] += A*src[2] + src[2+stride];
>> +      dst[3] += A*src[3] + src[3+stride];
>> +      dst[4] += A*src[4] + src[4+stride];
>> +      dst[5] += A*src[5] + src[5+stride];
>> +      dst[6] += A*src[6] + src[6+stride];
>> +      dst[7] += A*src[7] + src[7+stride];
>> +      dst += 8;
>> +      src += 8;
>> +      if (dummy == 32)
>> +        abort ();
>> +   }
>> +}
>> +
>> +
>

Re: [patch] Support vectorization of widening shifts

2011-09-26 Thread Ira Rosen
On 26 September 2011 17:12, Richard Guenther  wrote:
> On Mon, Sep 19, 2011 at 9:54 AM, Ira Rosen  wrote:
>> Hi,
>>
>> This patch adds a support of widening shift left. The following
>> pattern is detected:
>>
>> type a_t;
>> TYPE a_T, res_T;
>>
>> a_t = ;
>> a_T = (TYPE) a_t;
>> res_T = a_T << CONST;
>>
>> ('TYPE' is at least 2 times bigger than 'type', and CONST is at most
>> the size of 'type')
>>
>> and create a pattern stmt using new tree code WIDEN_SHIFT_LEFT_EXPR for it:
>>
>> a_t = ;
>> a_T = (TYPE) a_t;
>> res_T = a_T << CONST;
>>    -->  res_T = a_t w<< CONST;
>>
>> which is later transformed into:
>>
>> va_t = ;
>> vres_T0 = WIDEN_SHIFT_LEFT_LO_EXPR ;
>> vres_T1 = WIDEN_SHIFT_LEFT_HI_EXPR ;
>>
>> This patch also supports unsigned types, and cases when 'TYPE' is 4
>> times bigger than 'type'.
>> This feature is similar to vectorization of widening multiplication.
>>
>> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
>> and arm-linux-gnueabi
>> OK for mainline?
>
> Hmm, it doesn't look like arm has real widening shift instructions.

It does: vshll. The implementation may look awkward because we don't
support multiple vector sizes in the same operation (vshll takes a
64-bit vector and produces a 128-bit vector), but the resulting code
is just the instruction itself.

> So why not split this into the widening, shift parts in the vectorizer?

What do you mean? (We of course already support widening first and
then shifting the widened value).

Thanks,
Ira

> That
> way you wouldn't need new tree codes and all architectures that can
> do widening conversions would benefit?
>
> Thanks,
> Richard.
>


[patch] Support multiple types in SLP

2011-09-27 Thread Ira Rosen
Hi,

This patch adds a support of multiple types (in the same SLP instance)
in basic block vectorization.

Bootstrapped and tested on powerpc64-suse-linux.
Applied to trunk.

Ira

ChangeLog:

* tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
vectorization.
(vectorizable_type_promotion): Likewise.
(vect_analyze_stmt): Call vectorizable_type_demotion and
vectorizable_type_promotion for basic blocks.
(supportable_widening_operation): Don't assume loop vectorization.
* tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
basic blocks.  Update vectorization factor for basic block
vectorization.
(vect_analyze_slp_instance): Allow multiple types for basic block
vectorization.  Recheck unrolling factor after construction of SLP
instance.

testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
vectors.
* gcc.dg/vect/bb-slp-27.c: New.
* gcc.dg/vect/bb-slp-28.c: New.
Index: ChangeLog
===
--- ChangeLog   (revision 179266)
+++ ChangeLog   (working copy)
@@ -1,3 +1,18 @@
+2011-09-27  Ira Rosen  
+
+   * tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
+   vectorization.
+   (vectorizable_type_promotion): Likewise.
+   (vect_analyze_stmt): Call vectorizable_type_demotion and
+   vectorizable_type_promotion for basic blocks.
+   (supportable_widening_operation): Don't assume loop vectorization.
+   * tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
+   basic blocks.  Update vectorization factor for basic block
+   vectorization.
+   (vect_analyze_slp_instance): Allow multiple types for basic block
+   vectorization.  Recheck unrolling factor after construction of SLP
+   instance.
+
 2011-09-27  Richard Guenther  
 
* tree-object-size.c (compute_object_sizes): Fix dumping of
Index: testsuite/gcc.dg/vect/bb-slp-27.c
===
--- testsuite/gcc.dg/vect/bb-slp-27.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-27.c   (revision 0)
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 3
+#define N 16
+
+short src[N], dst[N];
+
+void foo (int a)
+{
+  dst[0] += a*src[0];
+  dst[1] += a*src[1];
+  dst[2] += a*src[2];
+  dst[3] += a*src[3];
+  dst[4] += a*src[4];
+  dst[5] += a*src[5];
+  dst[6] += a*src[6];
+  dst[7] += a*src[7];
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  dst[i] = 0;
+  src[i] = i;
+}
+
+  foo (A);
+
+  for (i = 0; i < 8; i++)
+{
+  if (dst[i] != A * i)
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 
"slp" { target { vect_int_mult && vect_unpack && vect_pack_trunc } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/gcc.dg/vect/bb-slp-28.c
===
--- testsuite/gcc.dg/vect/bb-slp-28.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-28.c   (revision 0)
@@ -0,0 +1,71 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define A 300
+#define N 16
+
+char src[N];
+short dst[N];
+short src1[N], dst1[N];
+
+void foo (int a)
+{
+  dst[0] = (short) (a * (int) src[0]);
+  dst[1] = (short) (a * (int) src[1]);
+  dst[2] = (short) (a * (int) src[2]);
+  dst[3] = (short) (a * (int) src[3]);
+  dst[4] = (short) (a * (int) src[4]);
+  dst[5] = (short) (a * (int) src[5]);
+  dst[6] = (short) (a * (int) src[6]);
+  dst[7] = (short) (a * (int) src[7]);
+  dst[8] = (short) (a * (int) src[8]);
+  dst[9] = (short) (a * (int) src[9]);
+  dst[10] = (short) (a * (int) src[10]);
+  dst[11] = (short) (a * (int) src[11]);
+  dst[12] = (short) (a * (int) src[12]);
+  dst[13] = (short) (a * (int) src[13]);
+  dst[14] = (short) (a * (int) src[14]);
+  dst[15] = (short) (a * (int) src[15]);
+
+  dst1[0] += src1[0];
+  dst1[1] += src1[1];
+  dst1[2] += src1[2];
+  dst1[3] += src1[3];
+  dst1[4] += src1[4];
+  dst1[5] += src1[5];
+  dst1[6] += src1[6];
+  dst1[7] += src1[7];
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  dst[i] = 2;
+  dst1[i] = 0;
+  src[i] = i;
+  src1[i] = i+2;
+}
+
+  foo (A);
+
+  for (i = 0; i < N; i++)
+{
+  if (dst[i] != A * i
+  || (i < N/2 && dst1[i] != i + 2))
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 
"slp" { target { vect_int_mult && vect_

Re: [patch] Support vectorization of widening shifts

2011-10-02 Thread Ira Rosen
On 29 September 2011 17:30, Ramana Radhakrishnan
 wrote:
> On 19 September 2011 08:54, Ira Rosen  wrote:
>
>>
>> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
>> and arm-linux-gnueabi
>> OK for mainline?
>
> Sorry I missed this patch. Is there any reason why we need unspecs in
> this case ? Can't this be represented by subregs and zero/ sign
> extensions in RTL without the UNSPECs ?

Like this:

Index: config/arm/neon.md
===
--- config/arm/neon.md  (revision 178942)
+++ config/arm/neon.md  (working copy)
@@ -5550,6 +5550,46 @@
  }
 )

+(define_insn "neon_vec_shiftl_"
+ [(set (match_operand: 0 "register_operand" "=w")
+   (SE: (match_operand:VW 1 "register_operand" "w")))
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  /* The boundaries are: 0 < imm <= size.  */
+  neon_const_bounds (operands[2], 0, neon_element_bits (mode) + 1);
+  return "vshll. %q0, %P1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_shiftl_lo_"
+  [(match_operand: 0 "register_operand" "")
+   (SE: (match_operand:VU 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+  emit_insn (gen_neon_vec_shiftl_ (operands[0],
+   simplify_gen_subreg (mode, operands[1], mode, 0),
+   operands[2]));
+   DONE;
+ }
+)
+
+(define_expand "vec_widen_shiftl_hi_"
+  [(match_operand: 0 "register_operand" "")
+   (SE: (match_operand:VU 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+  emit_insn (gen_neon_vec_shiftl_ (operands[0],
+simplify_gen_subreg (mode, operands[1], mode,
+GET_MODE_SIZE (mode)),
+operands[2]));
+   DONE;
+ }
+)
+
 ;; Vectorize for non-neon-quad case
 (define_insn "neon_unpack_"
  [(set (match_operand: 0 "register_operand" "=w")
@@ -5626,6 +5666,34 @@
  }
 )

+(define_expand "vec_widen_shiftl_hi_"
+ [(match_operand: 0 "register_operand" "")
+   (SE: (match_operand:VDI 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (mode);
+   emit_insn (gen_neon_vec_shiftl_ (tmpreg, operands[1],
operands[2]));
+   emit_insn (gen_neon_vget_high (operands[0], tmpreg));
+
+   DONE;
+ }
+)
+
+(define_expand "vec_widen_shiftl_lo_"
+  [(match_operand: 0 "register_operand" "")
+   (SE: (match_operand:VDI 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (mode);
+   emit_insn (gen_neon_vec_shiftl_ (tmpreg, operands[1],
operands[2]));
+   emit_insn (gen_neon_vget_low (operands[0], tmpreg));
+
+   DONE;
+ }
+)
+
 ; FIXME: These instruction patterns can't be used safely in big-endian mode
 ; because the ordering of vector elements in Q registers is different from what
 ; the semantics of the instructions require.

?

Thanks,
Ira


>
> cheers
> Ramana
>
>>
>> Thanks,
>> Ira
>>
>> ChangeLog:
>>
>>        * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
>> vec_widen_sshiftl_hi,
>>        vec_widen_sshiftl_lo): Document.
>>        * tree-pretty-print.c (dump_generic_node): Handle 
>> WIDEN_SHIFT_LEFT_EXPR,
>>        VEC_WIDEN_SHIFT_LEFT_HI_EXPR and VEC_WIDEN_SHIFT_LEFT_LO_EXPR.
>>        (op_code_prio): Likewise.
>>        (op_symbol_code): Handle WIDEN_SHIFT_LEFT_EXPR.
>>        * optabs.c (optab_for_tree_code): Handle
>>        VEC_WIDEN_SHIFT_LEFT_HI_EXPR and VEC_WIDEN_SHIFT_LEFT_LO_EXPR.
>>        (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
>>        * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
>>        * genopinit.c (optabs): Initialize the new optabs.
>>        * expr.c (expand_expr_real_2): Handle
>>        VEC_WIDEN_SHIFT_LEFT_HI_EXPR and VEC_WIDEN_SHIFT_LEFT_LO_EXPR.
>>        * gimple-pretty-print.c (dump_binary_rhs): Likewise.
>>        * tree-vectorizer.h (NUM_PATTERNS): Increase to 6.
>>        * tree.def (WIDEN_SHIFT_LEFT_EXPR, VEC_WIDEN_SHIFT_LEFT_HI_EXPR,
>>        VEC_WIDEN_SHIFT_LEFT_LO_EXPR): New.
>>        * cfgexpand.c (expand_debug_expr):  Handle new tree codes.
>>  

[patch][committed] Fix check_effective_target_vect_multiple_sizes and check_effective_target_vect64

2011-10-04 Thread Ira Rosen
Hi,

Michael pointed out this problem in
check_effective_target_vect_multiple_sizes and
check_effective_target_vect64 that I added lately.

Tested on powerpc64-suse-linux.
Committed as obvious.

Thanks,
Ira

testsuite/ChangeLog:

* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
Make et_vect_multiple_sizes_saved global.
(check_effective_target_vect64): Make et_vect64_saved global.


Index: testsuite/lib/target-supports.exp
===
--- testsuite/lib/target-supports.exp   (revision 179489)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3375,7 +3375,7 @@ foreach N {2 3 4 8} {
 # Return 1 if the target supports multiple vector sizes

 proc check_effective_target_vect_multiple_sizes { } {
-global et_vect_multiple_sizes
+global et_vect_multiple_sizes_saved

 if [info exists et_vect_multiple_sizes_saved] {
 verbose "check_effective_target_vect_multiple_sizes: using
cached result" 2
@@ -3393,7 +3393,7 @@ proc check_effective_target_vect_multiple_sizes {
 # Return 1 if the target supports vectors of 64 bits.

 proc check_effective_target_vect64 { } {
-global et_vect64
+global et_vect64_saved

 if [info exists et_vect64_saved] {
 verbose "check_effective_target_vect64: using cached result" 2


Re: [PATCH] Fix memory leak in vect_pattern_recog_1

2011-10-05 Thread Ira Rosen
On 5 October 2011 20:06, Jakub Jelinek  wrote:
> Hi!
>
> If vect_recog_func fails (or the other spot where vect_pattern_recog_1
> returns early), the vector allocated in the function isn't freed, leading
> to memory leak.  But, more importantly, doing a VEC_alloc + VEC_free
> num_stmts_in_loop * NUM_PATTERNS times seems to be completely unnecessary,
> the following patch allocates just one vector for that purpose in the caller
> and only performs VEC_truncate in each call to make it independent from
> previous uses of the vector.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Ira

>
> 2011-10-05  Jakub Jelinek  
>
>        * tree-vect-patterns.c (vect_pattern_recog_1): Add stmts_to_replace
>        argument, truncate it at the beginning instead of allocating there
>        and freeing at the end.
>        (vect_pattern_recog): Allocate stmts_to_replace here and free at end,
>        pass its address to vect_pattern_recog_1.
>
> --- gcc/tree-vect-patterns.c.jj 2011-09-26 14:06:52.0 +0200
> +++ gcc/tree-vect-patterns.c    2011-10-05 15:57:38.0 +0200
> @@ -1281,7 +1281,8 @@ vect_mark_pattern_stmts (gimple orig_stm
>  static void
>  vect_pattern_recog_1 (
>        gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
> -       gimple_stmt_iterator si)
> +       gimple_stmt_iterator si,
> +       VEC (gimple, heap) **stmts_to_replace)
>  {
>   gimple stmt = gsi_stmt (si), pattern_stmt;
>   stmt_vec_info stmt_info;
> @@ -1291,14 +1292,14 @@ vect_pattern_recog_1 (
>   enum tree_code code;
>   int i;
>   gimple next;
> -  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
>
> -  VEC_quick_push (gimple, stmts_to_replace, stmt);
> -  pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, 
> &type_out);
> +  VEC_truncate (gimple, *stmts_to_replace, 0);
> +  VEC_quick_push (gimple, *stmts_to_replace, stmt);
> +  pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
>   if (!pattern_stmt)
>     return;
>
> -  stmt = VEC_last (gimple, stmts_to_replace);
> +  stmt = VEC_last (gimple, *stmts_to_replace);
>   stmt_info = vinfo_for_stmt (stmt);
>   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>
> @@ -1363,8 +1364,8 @@ vect_pattern_recog_1 (
>   /* It is possible that additional pattern stmts are created and inserted in
>      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
>      relevant statements.  */
> -  for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
> -              && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
> +  for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
> +             && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
>        i++)
>     {
>       stmt_info = vinfo_for_stmt (stmt);
> @@ -1377,8 +1378,6 @@ vect_pattern_recog_1 (
>
>       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
>     }
> -
> -  VEC_free (gimple, heap, stmts_to_replace);
>  }
>
>
> @@ -1468,6 +1467,7 @@ vect_pattern_recog (loop_vec_info loop_v
>   gimple_stmt_iterator si;
>   unsigned int i, j;
>   gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
> +  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
>
>   if (vect_print_dump_info (REPORT_DETAILS))
>     fprintf (vect_dump, "=== vect_pattern_recog ===");
> @@ -1483,8 +1483,11 @@ vect_pattern_recog (loop_vec_info loop_v
>           for (j = 0; j < NUM_PATTERNS; j++)
>             {
>               vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
> -              vect_pattern_recog_1 (vect_recog_func_ptr, si);
> +             vect_pattern_recog_1 (vect_recog_func_ptr, si,
> +                                   &stmts_to_replace);
>             }
>         }
>     }
> +
> +  VEC_free (gimple, heap, stmts_to_replace);
>  }
>
>        Jakub
>


Re: [PATCH] Optimize COND_EXPR where then/else operands are INTEGER_CSTs of different size than the comparison operands

2011-10-06 Thread Ira Rosen
On 6 October 2011 18:17, Jakub Jelinek  wrote:
> Hi!
>
> Since Richard's changes recently to allow different modes in vcond
> patterns (so far on i?86/x86_64 only I think) we can vectorize more
> COND_EXPRs than before, and this patch improves it a tiny bit more
> - even i?86/x86_64 support vconds only if the sizes of vector element
> modes are the same.  With this patch we can optimize even if it is wider
> or narrower, by vectorizing it as the COND_EXPR in integer mode matching
> the size of the comparsion operands and then a cast.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>

OK, but...

> --- gcc/tree-vect-stmts.c.jj    2011-09-29 14:25:46.0 +0200
> +++ gcc/tree-vect-stmts.c       2011-10-06 12:16:43.0 +0200
> @@ -652,9 +652,26 @@ vect_mark_stmts_to_be_vectorized (loop_v
>              have to scan the RHS or function arguments instead.  */
>           if (is_gimple_assign (stmt))
>             {
> -              for (i = 1; i < gimple_num_ops (stmt); i++)
> +             enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
> +             tree op = gimple_assign_rhs1 (stmt);
> +
> +             i = 1;
> +             if ((rhs_code == COND_EXPR || rhs_code == VEC_COND_EXPR)

I don't understand why we need VEC_COND_EXPR here.

> +                 && COMPARISON_CLASS_P (op))
> +               {
> +                 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
> +                                   live_p, relevant, &worklist)
> +                     || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
> +                                      live_p, relevant, &worklist))
> +                   {
> +                     VEC_free (gimple, heap, worklist);
> +                     return false;
> +                   }
> +                 i = 2;
> +               }
> +             for (; i < gimple_num_ops (stmt); i++)
>                 {
> -                  tree op = gimple_op (stmt, i);
> +                 op = gimple_op (stmt, i);
>                   if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
>                                     &worklist))
>                     {
>

Thanks,
Ira


Re: [PATCH] Minor readability improvement in vect_pattern_recog{,_1}

2011-10-06 Thread Ira Rosen
On 6 October 2011 18:19, Jakub Jelinek  wrote:
> Hi!
>
> tree-vectorizer.h already has typedefs for the recog functions,
> and using that typedef we can make these two functions slightly more
> readable.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Ira

>
> 2011-10-06  Jakub Jelinek  
>
>        * tree-vect-patterns.c (vect_pattern_recog_1): Use
>        vect_recog_func_ptr typedef for the first argument.
>        (vect_pattern_recog): Rename vect_recog_func_ptr variable
>        to vect_recog_func, use vect_recog_func_ptr typedef for it.
>
> --- gcc/tree-vect-patterns.c.jj 2011-10-06 14:37:12.0 +0200
> +++ gcc/tree-vect-patterns.c    2011-10-06 15:50:12.0 +0200
> @@ -1393,10 +1393,9 @@ vect_mark_pattern_stmts (gimple orig_stm
>    for vect_recog_pattern.  */
>
>  static void
> -vect_pattern_recog_1 (
> -       gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
> -       gimple_stmt_iterator si,
> -       VEC (gimple, heap) **stmts_to_replace)
> +vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
> +                     gimple_stmt_iterator si,
> +                     VEC (gimple, heap) **stmts_to_replace)
>  {
>   gimple stmt = gsi_stmt (si), pattern_stmt;
>   stmt_vec_info stmt_info;
> @@ -1580,7 +1579,7 @@ vect_pattern_recog (loop_vec_info loop_v
>   unsigned int nbbs = loop->num_nodes;
>   gimple_stmt_iterator si;
>   unsigned int i, j;
> -  gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
> +  vect_recog_func_ptr vect_recog_func;
>   VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
>
>   if (vect_print_dump_info (REPORT_DETAILS))
> @@ -1596,8 +1595,8 @@ vect_pattern_recog (loop_vec_info loop_v
>           /* Scan over all generic vect_recog_xxx_pattern functions.  */
>           for (j = 0; j < NUM_PATTERNS; j++)
>             {
> -              vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
> -             vect_pattern_recog_1 (vect_recog_func_ptr, si,
> +             vect_recog_func = vect_vect_recog_func_ptrs[j];
> +             vect_pattern_recog_1 (vect_recog_func, si,
>                                    &stmts_to_replace);
>             }
>         }
>
>        Jakub
>


Re: [PATCH] Optimize COND_EXPR where then/else operands are INTEGER_CSTs of different size than the comparison operands

2011-10-06 Thread Ira Rosen
On 6 October 2011 19:28, Jakub Jelinek  wrote:
> On Thu, Oct 06, 2011 at 07:27:28PM +0200, Ira Rosen wrote:
>> > +             i = 1;
>> > +             if ((rhs_code == COND_EXPR || rhs_code == VEC_COND_EXPR)
>>
>> I don't understand why we need VEC_COND_EXPR here.
>
> Only for completeness, as VEC_COND_EXPR is the same weirdo thingie like
> COND_EXPR.  I can leave that out if you want.

But we mark stmts that we want to vectorize here. I think that
expecting a vector stmt is confusing. So yes, please, leave it out.

Thanks,
Ira

>
>        Jakub
>


[patch] Fix PR tree-optimization/50635

2011-10-09 Thread Ira Rosen
Hi,

In vectorizer pattern recognition when a pattern def_stmt already
exists, we need to mark it properly for the current pattern.
Another problem is that we don't really have to check that TYPE_OUT is
a vector type. It is set by the pattern detection procedures, and if
the type is invalid we fail later in the operation analysis anyway.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50635
* tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add
DEF_STMT to the list of statements to be replaced by the
pattern statements.
(vect_handle_widen_mult_by_const): Don't check TYPE_OUT.

testsuite/ChangeLog:

PR tree-optimization/50635
* gcc.dg/vect/pr50635.c: New test.


Index: testsuite/gcc.dg/vect/pr50635.c
===
--- testsuite/gcc.dg/vect/pr50635.c (revision 0)
+++ testsuite/gcc.dg/vect/pr50635.c (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+typedef signed long int32_t;
+typedef char int8_t;
+
+void f0a(int32_t * result, int32_t * arg1, int8_t * arg2, int32_t temp_3)
+{
+  int idx;
+  for (idx=0;idx<10;idx += 1)
+{
+  int32_t temp_4;
+  int32_t temp_12;
+
+  temp_4 = (-2 & arg2[idx]) + temp_3;
+  temp_12 = -2 * arg2[idx] + temp_4;
+  result[idx] = temp_12;
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 179718)
+++ tree-vect-patterns.c(working copy)
@@ -388,6 +388,7 @@ vect_handle_widen_mult_by_const (gimple stmt, tree
   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
 return false;

+  VEC_safe_push (gimple, heap, *stmts, def_stmt);
   *oprnd = gimple_assign_lhs (new_stmt);
 }
   else
@@ -1424,8 +1425,6 @@ vect_pattern_recog_1 (vect_recog_func_ptr vect_rec
 {
   /* No need to check target support (already checked by the pattern
  recognition function).  */
-  if (type_out)
-   gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
   pattern_vectype = type_out ? type_out : type_in;
 }
   else


[patch] Fix PR bootstrap/51112

2011-11-13 Thread Ira Rosen
Hi,

This patch fixes a maybe-uninitialized error.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR bootstrap/51112
* tree-vect-stmts.c (vectorizable_condition): Initialize comp_vectype.

Index: tree-vect-stmts.c
===
--- tree-vect-stmts.c   (revision 181345)
+++ tree-vect-stmts.c   (working copy)
@@ -4968,7 +4968,7 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
   tree cond_expr, then_clause, else_clause;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree comp_vectype;
+  tree comp_vectype = NULL_TREE;
   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
   tree vec_compare, vec_cond_expr;


Re: [PATCH][PING] Vectorize conversions directly

2011-11-26 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 22/11/2011 03:31:22 PM:

> From: Ramana Radhakrishnan 
> > gcc/testsuite/lib/
> >    * target-supports.exp (check_effective_target_vect_intfloat_cvt):
True
> >      for ARM NEON.
> >      (check_effective_target_vect_uintfloat_cvt): Likewise.
> >      (check_effective_target_vect_intfloat_cvt): Likewise.
> >      (check_effective_target_vect_floatuint_cvt): Likewise.
> >      (check_effective_target_vect_floatint_cvt): Likewise.
> >      (check_effective_target_vect_extract_even_odd): Likewise.
>
> I'm not sure about enabling the vect_extract_even_odd case. If this
> assumes the presence of an extract-even-odd from registers type
> operation, then the Neon port doesn't really support vec_extract_even
> / vec_extract_odd forms -  You do have them in one single instruction
> if you tried to load them from / or store them to memory which is the
> vld2 / vst2 instruction while the register form of vuzp which reads
> and writes to both source operands is not really supported directly
> from the backend.

Right.
Dmitry, you can do this instead:

Index: fast-math-pr35982.c
===
--- fast-math-pr35982.c (revision 181150)
+++ fast-math-pr35982.c (working copy)
@@ -20,7 +20,7 @@
   return avg;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target vect_extract_even_odd  } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail
vect_extract_even_odd  } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_extract_even_odd || vect_strided2 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail
{ vect_extract_even_odd  || vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */

Ira

>
> The other testsuite changes look OK to me.
>
> cheers
> Ramana
>
> >
>



[patch] Fix PR tree-optimization/51301

2011-11-28 Thread Ira Rosen
Hi,

In vectorizer's over-widening pattern recognition the last statement
is expected to be a type demotion, but the check for that was
incomplete. We now check that the resulting type is not bigger than
the original type of the computation.

Bootstrapped and tested on powerpc64-suse-linux, tested on
arm-linux-gnueabi (cross).
Committed.

Ira

ChangeLog:

PR tree-optimization/51301
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that
the last statement doesn't convert to a bigger type than the original
type of the computation.

testsuite/ChangeLog:

PR tree-optimization/51301
* gcc.dg/vect/pr51301.c: New test.

Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 181796)
+++ tree-vect-patterns.c(working copy)
@@ -1088,6 +1088,7 @@ vect_recog_over_widening_pattern (VEC (gimple, hea
   tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
   bool first;
   struct loop *loop = (gimple_bb (stmt))->loop_father;
+  tree type = NULL;

   first = true;
   while (1)
@@ -1150,6 +1151,7 @@ vect_recog_over_widening_pattern (VEC (gimple, hea
   print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
 }

+  type = gimple_expr_type (stmt);
   prev_stmt = stmt;
   stmt = use_stmt;

@@ -1165,9 +1167,11 @@ vect_recog_over_widening_pattern (VEC (gimple, hea
 {
   use_lhs = gimple_assign_lhs (use_stmt);
   use_type = TREE_TYPE (use_lhs);
-  /* Support only type promotion or signedess change.  */
+  /* Support only type promotion or signedess change.  Check that USE_TYPE
+is not bigger than the original type.  */
   if (!INTEGRAL_TYPE_P (use_type)
-  || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
+  || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
+ || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
 return NULL;

   if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
Index: testsuite/gcc.dg/vect/pr51301.c
===
--- testsuite/gcc.dg/vect/pr51301.c (revision 0)
+++ testsuite/gcc.dg/vect/pr51301.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+typedef signed char int8_t;
+typedef signed long long int64_t;
+int64_t
+f0a (int8_t * __restrict__ arg1)
+{
+  int idx;
+  int64_t result = 0;
+  for (idx = 0; idx < 416; idx += 1)
+result += arg1[idx] << (arg1[idx] == arg1[idx]);
+  return result;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */


Re: [PATCH] Don't change DR_STMT if vect_pattern_recog_1 would fail (PR tree-optimization/51356)

2011-12-01 Thread Ira Rosen
On 1 December 2011 18:41, Jakub Jelinek  wrote:
> Hi!

Hi,

>
> As mentioned in the PR, vect_pattern_recog_1 attempts to find out
> if the computed type_in and type_out are already vector types or not,
> and uses VECTOR_MODE_P (TYPE_MODE (type_in)) as the test.  Unfortunately,
> get_vectype_for_scalar_type on some targets (e.g. PowerPC) returns a
> VECTOR_TYPE with TImode for a DImode integer/boolean scalar type.
> If that happens, vect_recog_bool_pattern assumes it will succeed and changes
> DR_STMT, but vect_mark_pattern_stmts isn't called and we ICE later on.
> Not sure what actually can be vectorized using scalar mode vectors,
> so either we adjust vect_recog_bool_pattern like this, or perhaps
> vect_pattern_recog_1 could use a different test (TREE_CODE (type_in) ==
> VECTOR_TYPE)?

But AFAIU in the later case we would fail to vectorize anyway, so I am
OK with your patch.

Thanks,
Ira

>
> This has been bootstrapped/regtested on x86_64-linux and i686-linux
> and fixes the testcase on PowerPC.
>
> 2011-12-01  Jakub Jelinek  
>
>        PR tree-optimization/51356
>        * tree-vect-patterns.c (vect_recog_bool_pattern): Give up if
>        vectype doesn't have VECTOR_MODE_P.
>
> --- gcc/tree-vect-patterns.c.jj 2011-11-29 15:09:18.0 +0100
> +++ gcc/tree-vect-patterns.c    2011-11-30 17:57:42.183149742 +0100
> @@ -2078,6 +2078,8 @@ vect_recog_bool_pattern (VEC (gimple, he
>       stmt_vec_info pattern_stmt_info;
>       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
>       gcc_assert (vectype != NULL_TREE);
> +      if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
> +       return NULL;
>       if (!check_bool_pattern (var, loop_vinfo))
>        return NULL;
>
>
>        Jakub
>


Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-02 Thread Ira Rosen

gcc-patches-ow...@gcc.gnu.org wrote on 02/12/2011 06:23:25 PM:

> Hi,
>
> This patch increases array sizes in tests from vect.exp suite, thus
> enabling 256-bit vectorization where it's available.
>
> Ok for trunk?

--- a/gcc/testsuite/gcc.dg/vect/slp-24.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-24.c
...
@@ -13,14 +12,17 @@ typedef struct {
unsigned char d;
 } s;

-unsigned char ub[N*2] =
{1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+unsigned char ub[N*2];
+unsigned char uc[N];
+
+volatile int y = 0;
+unsigned char check_diff = 0;

 void
 main1 (unsigned char x, unsigned char max_result, unsigned char
min_result, s *arr)
 {
   int i;
-  unsigned char udiff = 2;
+  unsigned char udiff = 0;

Please don't change initial values to 0, we want to check that everything
works fine for non-zeros as well.
There are several other occasions in the patch.

Thanks,
Ira

>
> Changelog:
> 2011-12-02  Michael Zolotukhin  
>
>* gcc.dg/vect/slp-13.c: Increase array size, add initialization.
>* gcc.dg/vect/slp-24.c: Ditto.
>* gcc.dg/vect/slp-3.c: Likewise and fix scans.
>* gcc.dg/vect/slp-34.c: Ditto.
>* gcc.dg/vect/slp-4.c: Ditto.
>* gcc.dg/vect/slp-cond-2.c: Ditto.
>* gcc.dg/vect/slp-multitypes-11.c: Ditto.
>* gcc.dg/vect/vect-1.c: Ditto.
>* gcc.dg/vect/vect-10.c: Ditto.
>* gcc.dg/vect/vect-105.c: Ditto.
>* gcc.dg/vect/vect-112.c: Ditto.
>* gcc.dg/vect/vect-15.c: Ditto.
>* gcc.dg/vect/vect-2.c: Ditto.
>* gcc.dg/vect/vect-31.c: Ditto.
>* gcc.dg/vect/vect-32.c: Ditto.
>* gcc.dg/vect/vect-33.c: Ditto.
>* gcc.dg/vect/vect-34.c: Ditto.
>* gcc.dg/vect/vect-35.c: Ditto.
>* gcc.dg/vect/vect-36.c: Ditto.
>* gcc.dg/vect/vect-6.c: Ditto.
>* gcc.dg/vect/vect-73.c: Ditto.
>* gcc.dg/vect/vect-74.c: Ditto.
>* gcc.dg/vect/vect-75.c: Ditto.
>* gcc.dg/vect/vect-76.c: Ditto.
>* gcc.dg/vect/vect-80.c: Ditto.
>* gcc.dg/vect/vect-85.c: Ditto.
>* gcc.dg/vect/vect-89.c: Ditto.
>* gcc.dg/vect/vect-97.c: Ditto.
>* gcc.dg/vect/vect-98.c: Ditto.
>* gcc.dg/vect/vect-all.c: Ditto.
>* gcc.dg/vect/vect-double-reduc-6.c: Ditto.
>* gcc.dg/vect/vect-iv-8.c: Ditto.
>* gcc.dg/vect/vect-iv-8a.c: Ditto.
>* gcc.dg/vect/vect-outer-1.c: Ditto.
>* gcc.dg/vect/vect-outer-1a.c: Ditto.
>* gcc.dg/vect/vect-outer-1b.c: Ditto.
>* gcc.dg/vect/vect-outer-2.c: Ditto.
>* gcc.dg/vect/vect-outer-2a.c: Ditto.
>* gcc.dg/vect/vect-outer-2c.c: Ditto.
>* gcc.dg/vect/vect-outer-3.c: Ditto.
>* gcc.dg/vect/vect-outer-3a.c: Ditto.
>* gcc.dg/vect/vect-outer-4a.c: Ditto.
>* gcc.dg/vect/vect-outer-4b.c: Ditto.
>* gcc.dg/vect/vect-outer-4c.c: Ditto.
>* gcc.dg/vect/vect-outer-4d.c: Ditto.
>* gcc.dg/vect/vect-outer-4m.c: Ditto.
>* gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
>* gcc.dg/vect/vect-outer-fir.c: Ditto.
>* gcc.dg/vect/vect-over-widen-1.c: Ditto.
>* gcc.dg/vect/vect-over-widen-2.c: Ditto.
>* gcc.dg/vect/vect-over-widen-3.c: Ditto.
>* gcc.dg/vect/vect-over-widen-4.c: Ditto.
>* gcc.dg/vect/vect-reduc-1char.c: Ditto.
>* gcc.dg/vect/vect-reduc-2char.c: Ditto.
>* gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
>* gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
>* gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
>* gcc.dg/vect/vect-shift-2.c: Ditto.
>* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
>* gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
>* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
>* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
>* gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
>
> --
> ---
> Best regards,
> Michael V. Zolotukhin,
> Software Engineer
> Intel Corporation.
> [attachment "vect_tests.patch" deleted by Ira Rosen/Haifa/IBM]



Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-02 Thread Ira Rosen


Michael Zolotukhin  wrote on 02/12/2011
08:11:41 PM:
>
> > Please don't change initial values to 0, we want to check that
everything
> > works fine for non-zeros as well.
> > There are several other occasions in the patch.
>
> Please check the update patch (attached).

This is ok with me.

Thanks,
Ira

>
> On 2 December 2011 20:49, Ira Rosen  wrote:
> >
> > gcc-patches-ow...@gcc.gnu.org wrote on 02/12/2011 06:23:25 PM:
> >
> >> Hi,
> >>
> >> This patch increases array sizes in tests from vect.exp suite, thus
> >> enabling 256-bit vectorization where it's available.
> >>
> >> Ok for trunk?
> >
> > --- a/gcc/testsuite/gcc.dg/vect/slp-24.c
> > +++ b/gcc/testsuite/gcc.dg/vect/slp-24.c
> > ...
> > @@ -13,14 +12,17 @@ typedef struct {
> >    unsigned char d;
> >  } s;
> >
> > -unsigned char ub[N*2] =
> > {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,1,3,6,9,12,15,18,21,
> 24,27,30,33,36,39,42,45};
> > -unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> > +unsigned char ub[N*2];
> > +unsigned char uc[N];
> > +
> > +volatile int y = 0;
> > +unsigned char check_diff = 0;
> >
> >  void
> >  main1 (unsigned char x, unsigned char max_result, unsigned char
> > min_result, s *arr)
> >  {
> >   int i;
> > -  unsigned char udiff = 2;
> > +  unsigned char udiff = 0;
> >
> > Please don't change initial values to 0, we want to check that
everything
> > works fine for non-zeros as well.
> > There are several other occasions in the patch.
> >
> > Thanks,
> > Ira
> >
> >>
> >> Changelog:
> >> 2011-12-02  Michael Zolotukhin  
> >>
> >>    * gcc.dg/vect/slp-13.c: Increase array size, add initialization.
> >>    * gcc.dg/vect/slp-24.c: Ditto.
> >>    * gcc.dg/vect/slp-3.c: Likewise and fix scans.
> >>    * gcc.dg/vect/slp-34.c: Ditto.
> >>    * gcc.dg/vect/slp-4.c: Ditto.
> >>    * gcc.dg/vect/slp-cond-2.c: Ditto.
> >>    * gcc.dg/vect/slp-multitypes-11.c: Ditto.
> >>    * gcc.dg/vect/vect-1.c: Ditto.
> >>    * gcc.dg/vect/vect-10.c: Ditto.
> >>    * gcc.dg/vect/vect-105.c: Ditto.
> >>    * gcc.dg/vect/vect-112.c: Ditto.
> >>    * gcc.dg/vect/vect-15.c: Ditto.
> >>    * gcc.dg/vect/vect-2.c: Ditto.
> >>    * gcc.dg/vect/vect-31.c: Ditto.
> >>    * gcc.dg/vect/vect-32.c: Ditto.
> >>    * gcc.dg/vect/vect-33.c: Ditto.
> >>    * gcc.dg/vect/vect-34.c: Ditto.
> >>    * gcc.dg/vect/vect-35.c: Ditto.
> >>    * gcc.dg/vect/vect-36.c: Ditto.
> >>    * gcc.dg/vect/vect-6.c: Ditto.
> >>    * gcc.dg/vect/vect-73.c: Ditto.
> >>    * gcc.dg/vect/vect-74.c: Ditto.
> >>    * gcc.dg/vect/vect-75.c: Ditto.
> >>    * gcc.dg/vect/vect-76.c: Ditto.
> >>    * gcc.dg/vect/vect-80.c: Ditto.
> >>    * gcc.dg/vect/vect-85.c: Ditto.
> >>    * gcc.dg/vect/vect-89.c: Ditto.
> >>    * gcc.dg/vect/vect-97.c: Ditto.
> >>    * gcc.dg/vect/vect-98.c: Ditto.
> >>    * gcc.dg/vect/vect-all.c: Ditto.
> >>    * gcc.dg/vect/vect-double-reduc-6.c: Ditto.
> >>    * gcc.dg/vect/vect-iv-8.c: Ditto.
> >>    * gcc.dg/vect/vect-iv-8a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-1.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-1a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-1b.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-2.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-2a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-2c.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-3.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-3a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4a.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4b.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4c.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4d.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-4m.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
> >>    * gcc.dg/vect/vect-outer-fir.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-1.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-2.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-3.c: Ditto.
> >>    * gcc.dg/vect/vect-over-widen-4.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-1char.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-2char.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
> >>    * gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
> >>    * gcc.dg/vect/vect-shift-2.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
> >>    * gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
> >>
> >> --
> >> ---
> >> Best regards,
> >> Michael V. Zolotukhin,
> >> Software Engineer
> >> Intel Corporation.
> >> [attachment "vect_tests.patch" deleted by Ira Rosen/Haifa/IBM]
> >
>
>
>
> --
> ---
> Best regards,
> Michael V. Zolotukhin,
> Software Engineer
> Intel Corporation.
> [attachment "vect_tests-2.patch" deleted by Ira Rosen/Haifa/IBM]



[patch] Fix exit phi nodes creation for double reduction - PR 51285

2011-12-04 Thread Ira Rosen
Hi,

This patch adds a missing exit phi node for outer loop in
vectorization of double reduction.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR middle-end/51285
* tree-vect-loop.c (vect_create_epilog_for_reduction): Create exit
phi nodes for outer loop in case of double reduction.

testsuite/ChangeLog:

PR middle-end/51285
* gfortran.dg/vect/pr51285.f90: New test.

Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 181984)
+++ tree-vect-loop.c(working copy)
@@ -3462,6 +3462,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap)
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
   VEC (gimple, heap) *new_phis = NULL;
+  VEC (gimple, heap) *inner_phis = NULL;
   enum vect_def_type dt = vect_unknown_def_type;
   int j, i;
   VEC (tree, heap) *scalar_results = NULL;
@@ -3470,6 +3471,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap)
   VEC (gimple, heap) *phis;
   bool slp_reduc = false;
   tree new_phi_result;
+  gimple inner_phi = NULL;

   if (slp_node)
 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
@@ -3626,11 +3628,36 @@ vect_create_epilog_for_reduction (VEC (tree, heap)
 }

   /* The epilogue is created for the outer-loop, i.e., for the loop being
- vectorized.  */
+ vectorized.  Create exit phis for the outer loop.  */
   if (double_reduc)
 {
   loop = outer_loop;
   exit_bb = single_exit (loop)->dest;
+  inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
+  FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
+   {
+ gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+ exit_bb);
+ SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+  PHI_RESULT (phi));
+ set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+   loop_vinfo, NULL));
+ VEC_quick_push (gimple, inner_phis, phi);
+ VEC_replace (gimple, new_phis, i, outer_phi);
+ prev_phi_info = vinfo_for_stmt (outer_phi);
+  while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
+{
+ phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+ outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+  exit_bb);
+ SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+  PHI_RESULT (phi));
+ set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+   loop_vinfo, NULL));
+ STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
+ prev_phi_info = vinfo_for_stmt (outer_phi);
+   }
+   }
 }

   exit_gsi = gsi_after_labels (exit_bb);
@@ -4040,6 +4067,8 @@ vect_finalize_reduction:
 {
   epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
   reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
+ if (double_reduc)
+   inner_phi = VEC_index (gimple, inner_phis, k / ratio);
 }

   if (slp_reduc)
@@ -4123,7 +4152,7 @@ vect_finalize_reduction:
  vs1 was created previously in this function by a call to
vect_get_vec_def_for_operand and is stored in
vec_initial_def;
- vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
+ vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
  vs0 is created here.  */

   /* Create vector phi node.  */
@@ -4144,7 +4173,7 @@ vect_finalize_reduction:
   add_phi_arg (vect_phi, vect_phi_init,
loop_preheader_edge (outer_loop),
UNKNOWN_LOCATION);
-  add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
+  add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
   if (vect_print_dump_info (REPORT_DETAILS))
 {
Index: testsuite/gfortran.dg/vect/pr51285.f90
===
--- testsuite/gfortran.dg/vect/pr51285.f90  (revision 0)
+++ testsuite/gfortran.dg/vect/pr51285.f90  (revision 0)
@@ -0,0 +1,36 @@
+! { dg-do compile }
+
+   SUBROUTINE smm_dnn_4_10_10_1_1_2_1(A,B,C)
+  REAL   :: C(4,10), B(10,10), A(4,10)
+  DO j=   1 ,  10 ,   2
+  DO i=   1 ,   4 ,   1
+  DO l=   1 ,  10 ,   1
+C(i+0,j+0)=C(i+0,j+0)+A(i+0,l+0)*B(l+0,j+0)
+C(i+0,j+1)=C(i+0,j+1)+A(i+0,l+0)*B(l+0,j+1)
+   

Re: [Patch] Increase array sizes in vect-tests to enable 256-bit vectorization

2011-12-05 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 05/12/2011 10:39:07 AM:

> From: Michael Zolotukhin 
> To: Richard Guenther 
> Cc: gcc-patches@gcc.gnu.org, izamya...@gmail.com
> Date: 05/12/2011 10:39 AM
> Subject: Re: [Patch] Increase array sizes in vect-tests to enable
> 256-bit vectorization
> Sent by: gcc-patches-ow...@gcc.gnu.org
>
> On 5 December 2011 10:14, Michael Zolotukhin
>  wrote:
> > Ok, will several tests with short arrays be enough for that or should
> > we keep all the original tests plus new ones with longer arrays?
>
> BTW, there is another problem with current tests with short arrays -
> scans are expecting specific number of some diagnostic messages like
> "not vectorized: unsupported unaligned store", and that number would
> be different if several vector-lengths are available - so we'll have
> fails in those tests.

There is vect_multiple_sizes for such cases.

Ira

>
> What is a right way to handle this? Currently I see two ways of doing
> it. One way is to just extend array sizes (as I did in the sent
> patch), the other way is to remove such scans from all tests
> (replacing them with some more general scans). If we are going to
> rewrite the scans, we need to be able to distinguish cases when
> several lengths are available - for now it looks unclear how to do
> this – at the first glance we could add length checkers in
> lib/support_targets.exp but this solution seems not quite good.
>
> What's your opinion on that?
>
> Best Regards, Michael.
>
> On 5 December 2011 10:14, Michael Zolotukhin
>  wrote:
> > Ok, will several tests with short arrays be enough for that or should
> > we keep all the original tests plus new ones with longer arrays?
> >
> > Michael
> >
> > On 4 December 2011 15:44, Richard Guenther
>  wrote:
> >> On Sat, Dec 3, 2011 at 5:54 PM, Michael Zolotukhin
> >>  wrote:
>  I mean, that, when 256-bit vectorization is enabled we still use
128bit
>  vectorization if the arrays are too short for 256bit
> vectorization.  You'll
>  lose this test coverage when you change the array sizes.
> >>> That's true, but do we need all these test both with short and long
> >>> arrays? We could have the tests with increased sizes and compile them
> >>> with/without use of avx, thus testing both 256- and 128- bit
> >>> vectorization. Additionally, we might want to add several tests with
> >>> short arrays to check what happens if 256-bit is available, but
arrays
> >>> is too short for it. I mean we don't need to duplicate all of the
> >>> tests to check this situation.
> >>
> >> Well, initially those tests served as a way to prove that dual-size
> >> vectorization
> >> works.  You should not remove this testing functionality.
> >>
> >> Richard.
> >>
> >>> On 3 December 2011 18:31, Richard Guenther
>  wrote:
>  On Fri, Dec 2, 2011 at 6:39 PM, Michael Zolotukhin
>   wrote:
> >>
> >> Shouldn't we add a variant for each testcase so that we still
> >> excercise both 128-bit and 256-bit vectorization paths?
> >
> > These tests are still good to test 128-bit vectorization, the
changes
> > was made just to make sure that 256-bit vectorization is possible
on
> > the tests.
> >
> > Actually, It's just first step in enabling these tests for 256 bits
-
> > for now many of them are failing if '-mavx' or '-mavx2' is
specified
> > (mostly due to different diagnostics messages produced by
vectorizer),
> > but with original (small) sizes of arrays we couldn't even check
that.
> > When they are enabled, it'll be possible to use them for testing
both
> > 128- and 256- bit vectorization.
> 
>  I mean, that, when 256-bit vectorization is enabled we still use
128bit
>  vectorization if the arrays are too short for 256bit
> vectorization.  You'll
>  lose this test coverage when you change the array sizes.
> 
>  Richard.
> 
> > Michael
> >
> >
> > 2011/12/2 Richard Guenther :
> >> 2011/12/2 Michael Zolotukhin :
> >>> Hi,
> >>>
> >>> This patch increases array sizes in tests from vect.exp suite,
thus
> >>> enabling 256-bit vectorization where it's available.
> >>>
> >>> Ok for trunk?
> >>
> >> Shouldn't we add a variant for each testcase so that we still
> >> excercise both 128-bit and 256-bit vectorization paths?
> >>
> >>> Changelog:
> >>> 2011-12-02  Michael Zolotukhin  
> >>>
> >>>        * gcc.dg/vect/slp-13.c: Increase array size, add
> initialization.
> >>>        * gcc.dg/vect/slp-24.c: Ditto.
> >>>        * gcc.dg/vect/slp-3.c: Likewise and fix scans.
> >>>        * gcc.dg/vect/slp-34.c: Ditto.
> >>>        * gcc.dg/vect/slp-4.c: Ditto.
> >>>        * gcc.dg/vect/slp-cond-2.c: Ditto.
> >>>        * gcc.dg/vect/slp-multitypes-11.c: Ditto.
> >>>        * gcc.dg/vect/vect-1.c: Ditto.
> >>>        * gcc.dg/vect/vect-10.c: Ditto.
> >>>        * gcc.dg/vect/vect-105.c: Ditto.
> >>>        * gcc.dg

Re: [PATCH] Fix vectorizer ICEs with calls with MEM_REF arguments (PR tree-optimization/51485)

2011-12-10 Thread Ira Rosen
On 9 December 2011 19:08, Jakub Jelinek  wrote:
> Hi!
>
> As mentioned in the PR, we ICE on the following testcase, because
> there are DRs in a GIMPLE_CALL stmt and when there is just one, we
> compute vectype for the call as if it were a load or store, but during
> computation of vectorization factor we only consider the return value
> of the call.  As such calls are not vectorizable anyway, the following
> patch just gives up on them.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
> (and with the if (bb_vinfo)/if (gather) parts removed for 4.6 too)?

OK for trunk.

Thanks,
Ira


>
> 2011-12-09  Jakub Jelinek  
>
>        PR tree-optimization/51485
>        * tree-vect-data-refs.c (vect_analyze_data_refs): Give up on
>        DRs in call stmts.
>
>        * g++.dg/vect/pr51485.cc: New test.
>
> --- gcc/tree-vect-data-refs.c.jj        2011-12-02 01:52:26.325893329 +0100
> +++ gcc/tree-vect-data-refs.c   2011-12-09 13:27:29.726668859 +0100
> @@ -2896,6 +2896,26 @@ vect_analyze_data_refs (loop_vec_info lo
>           return false;
>         }
>
> +      if (is_gimple_call (stmt))
> +       {
> +         if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> +           {
> +             fprintf (vect_dump, "not vectorized: dr in a call ");
> +             print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
> +           }
> +
> +         if (bb_vinfo)
> +           {
> +             STMT_VINFO_VECTORIZABLE (stmt_info) = false;
> +             stop_bb_analysis = true;
> +             continue;
> +           }
> +
> +         if (gather)
> +           free_data_ref (dr);
> +         return false;
> +       }
> +
>       /* Update DR field in stmt_vec_info struct.  */
>
>       /* If the dataref is in an inner-loop of the loop that is considered for
> --- gcc/testsuite/g++.dg/vect/pr51485.cc.jj     2011-12-09 13:28:45.155281405 
> +0100
> +++ gcc/testsuite/g++.dg/vect/pr51485.cc        2011-12-09 13:28:57.692205773 
> +0100
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +
> +struct A { A (); unsigned int a; };
> +double bar (A a) throw () __attribute__((pure));
> +
> +void
> +foo (unsigned int x, double *y, A *z)
> +{
> +  unsigned int i;
> +  for (i = 0; i < x; i++)
> +    y[i] = bar (z[i]);
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
>
>        Jakub


Re: [Patch] Adjust diag-scans in vect-tests to fix fails on AVX/AVX2

2011-12-12 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 12/12/2011 01:00:52 PM:

> I changed xfails to target-checks - for now I use common
> vect_multiple_sizes (though it'll fail when wider vectors emerge).
> Also, I changed AVX-check to the version Uros suggested. Please check
> updated patch (attached).
>
> As for vect_multiple_sizes_32B_16B and similar - isn't it too
> target-specific? I think if we want to keep everything as general as
> possible, we should have something like vect_1_vector_size_available,
> vect_2_vector_sizes_available, etc.

I think there is a difference between different vector sizes, and calling
it vect_X_vector_size_available is not sufficient. Your patch will cause
failures on ARM. It has two vector sizes, 16 and 8 bytes. E.g., vect-33.c
gets vectorized with the default vector size, and the alignment message
should be printed only once, and not twice as with your patch. So, it looks
like you need several vect_multiple_sizes_X.

Ira





Re: [Patch] Adjust diag-scans in vect-tests to fix fails on AVX/AVX2

2011-12-12 Thread Ira Rosen


Michael Zolotukhin  wrote on 12/12/2011
01:57:09 PM:

>
> By the way, how could we check if '-mprefer-avx128' was specified from
> target-supports.exp?
>

If I understand your question correctly, you can use check-flags (see
check_effective_target_arm_fp16_ok_nocache for example).

> Is there any global-variable for command line
> options or something similar?

flags

Ira



Re: [PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

2011-12-12 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 13/12/2011 04:05:57 AM:

> On core2, unaligned vector load/store using movdqu is a very slow
operation.
> Experiments show it is six times slower than movdqa (aligned) and this is
> irrespective of whether the resulting data happens to be aligned or not.
> For Corei7, there is no performance difference between the two and on
AMDs,
> movdqu is only about 10% slower.
>
> This patch does not vectorize loops that need to generate the slow
unaligned
> memory load/stores on core2.
>
>
>Do not vectorize loops on Core2 that need to use unaligned
>vector load/stores.
>* tree-vect-stmts.c (is_slow_vect_unaligned_load_store): New function.
>(vect_analyze_stmt): Check if the vectorizable load/store is slow.
>* target.def (TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New target hook.
>* doc/m.texi.in: Document new target hook:
>TARGET_SLOW_UNALIGNED_VECTOR_MEMOP
>* doc/m.texi: Regenerate.
>* config/i386/i386.c (ix86_slow_unaligned_vector_memop): New function.
>(TARGET_SLOW_UNALIGNED_VECTOR_MEMOP): New macro.
>


> @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool
*need_to_vect
> if (!bb_vinfo
> && (STMT_VINFO_RELEVANT_P (stmt_info)
> || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
> +{
>ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
>  || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
>  || vectorizable_conversion (stmt, NULL, NULL, NULL)
>  || vectorizable_shift (stmt, NULL, NULL, NULL)
>  || vectorizable_operation (stmt, NULL, NULL, NULL)
>  || vectorizable_assignment (stmt, NULL, NULL, NULL)
> -|| vectorizable_load (stmt, NULL, NULL, NULL, NULL)
>  || vectorizable_call (stmt, NULL, NULL)
> -|| vectorizable_store (stmt, NULL, NULL, NULL)
> -|| vectorizable_reduction (stmt, NULL, NULL, NULL)
> +   || vectorizable_reduction (stmt, NULL, NULL, NULL)
>  || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
> +
> +  if (!ok)
> +   {
> +  ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL)
> +   || vectorizable_store (stmt, NULL, NULL, NULL));
> +
> + if (ok && is_slow_vect_unaligned_load_store (stmt))
> +   ok = false;

Why not call is_slow_vect_unaligned_load_store from
vectorizable_load/store?

Ira


> +   }
> +}
>  else
>{
>  if (bb_vinfo)
> -  ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
> -|| vectorizable_type_demotion (stmt, NULL, NULL, node)
> -   || vectorizable_shift (stmt, NULL, NULL, node)
> -|| vectorizable_operation (stmt, NULL, NULL, node)
> -|| vectorizable_assignment (stmt, NULL, NULL, node)
> -|| vectorizable_load (stmt, NULL, NULL, node, NULL)
> -|| vectorizable_store (stmt, NULL, NULL, node));
> + {
> +   ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
> +|| vectorizable_type_demotion (stmt, NULL, NULL, node)
> +|| vectorizable_shift (stmt, NULL, NULL, node)
> +  || vectorizable_operation (stmt, NULL, NULL, node)
> +  || vectorizable_assignment (stmt, NULL, NULL, node));
> +if (!ok)
> + {
> +ok = (vectorizable_load (stmt, NULL, NULL, node, NULL)
> +|| vectorizable_store (stmt, NULL, NULL, node));
> +   if (ok && is_slow_vect_unaligned_load_store (stmt))
> +ok = false;
> + }
> + }
>}



Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion

2011-12-14 Thread Ira Rosen


Jakub Jelinek  wrote on 14/12/2011 02:25:13 PM:


>
> @@ -1573,6 +1576,211 @@ vect_recog_vector_vector_shift_pattern (
>return pattern_stmt;
>  }
>
> +/* Detect a signed division by power of two constant that wouldn't be
> +   otherwise vectorized:
> +
> +   type a_t, b_t;
> +
> +   S1 a_t = b_t / N;
> +
> +  where type 'type' is a signed integral type and N is a constant
positive
> +  power of two.
> +
> +  Similarly handle signed modulo by power of two constant:
> +
> +   S4 a_t = b_t % N;
> +
> +  Input/Output:
> +
> +  * STMTS: Contains a stmt from which the pattern search begins,
> +i.e. the division stmt.  S1 is replaced by:
> +  S3  y_t = b_t < 0 ? N - 1 : 0;
> +  S2  x_t = b_t + y_t;
> +  S1' a_t = x_t >> log2 (N);
> +
> +S4 is replaced by (where *_T temporaries have unsigned type):
> +  S9  y_T = b_t < 0 ? -1U : 0U;
> +  S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
> +  S7  z_t = (type) z_T;
> +  S6  w_t = b_t + z_t;
> +  S5  x_t = w_t & (N - 1);
> +  S4' a_t = x_t - z_t;
> +
> +  Output:
> +
> +  * TYPE_IN: The type of the input arguments to the pattern.
> +
> +  * TYPE_OUT: The type of the output of this pattern.
> +
> +  * Return value: A new stmt that will be used to replace the division
> +S1 or modulo S4 stmt.  */
> +
> +static gimple
> +vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
> + tree *type_in, tree *type_out)
> +{
> +  gimple last_stmt = VEC_pop (gimple, *stmts);
> +  gimple_stmt_iterator gsi;
> +  tree oprnd0, oprnd1, vectype, itype, cond;
> +  gimple pattern_stmt, def_stmt;
> +  enum tree_code rhs_code;
> +  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
> +  optab optab;
> +
> +  if (!is_gimple_assign (last_stmt))
> +return NULL;
> +
> +  rhs_code = gimple_assign_rhs_code (last_stmt);
> +  switch (rhs_code)
> +{
> +case TRUNC_DIV_EXPR:
> +case TRUNC_MOD_EXPR:
> +  break;
> +default:
> +  return NULL;
> +}
> +
> +  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
> +return NULL;
> +
> +  oprnd0 = gimple_assign_rhs1 (last_stmt);
> +  oprnd1 = gimple_assign_rhs2 (last_stmt);
> +  itype = TREE_TYPE (oprnd0);
> +  if (TREE_CODE (oprnd0) != SSA_NAME
> +  || TREE_CODE (oprnd1) != INTEGER_CST
> +  || TREE_CODE (itype) != INTEGER_TYPE
> +  || TYPE_UNSIGNED (itype)
> +  || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE
(itype))
> +  || !integer_pow2p (oprnd1)
> +  || tree_int_cst_sgn (oprnd1) != 1)
> +return NULL;
> +
> +  vectype = get_vectype_for_scalar_type (itype);
> +  if (vectype == NULL_TREE)
> +return NULL;
> +
> +  /* If the target can handle vectorized division or modulo natively,
> + don't attempt to optimize this.  */
> +  optab = optab_for_tree_code (rhs_code, vectype, optab_default);
> +  if (optab != NULL)
> +{
> +  enum machine_mode vec_mode = TYPE_MODE (vectype);
> +  int icode = (int) optab_handler (optab, vec_mode);
> +  if (icode != CODE_FOR_nothing
> + || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
> +   return NULL;
> +}
> +
> +  /* Pattern detected.  */
> +  if (vect_print_dump_info (REPORT_DETAILS))
> +fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
> +
> +  cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst
> (itype, 0));
> +  gsi = gsi_for_stmt (last_stmt);
> +  if (rhs_code == TRUNC_DIV_EXPR)
> +{
> +  tree var = vect_recog_temp_ssa_var (itype, NULL);
> +  def_stmt
> +   = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
> +fold_build2 (MINUS_EXPR, itype,
> +oprnd1,
> +build_int_cst (itype,
> + 1)),
> +build_int_cst (itype, 0));
> +  gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);

Hmm, you are inserting pattern stmts. This was causing some mess in the
past as explained here
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00801.html. Maybe you can use
STMT_VINFO_PATTERN_DEF_STMT to keep a chain of def_stmts?

Ira


> +  set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt,
loop_vinfo,
> + NULL));
> +  var = vect_recog_temp_ssa_var (itype, NULL);
> +  def_stmt
> +   = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
> +   gimple_assign_lhs (def_stmt));
> +  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
> +
> +  pattern_stmt
> +   = gimple_build_assign_with_ops (RSHIFT_EXPR,
> +   vect_recog_temp_ssa_var (itype, NULL),
> +   var,
> +   build_int_cst (itype,
> + tree_log2 (oprnd1)));
> +}
> +  else
> +{
> +  tree signmask;
> +  tree utype = build_nonstandard_integer_type (TYPE_PRECISION
> (itype), 1);
> +  tree shift = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE
(itype))
> +- tree_log2 (oprnd1));
> + 

Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion

2011-12-15 Thread Ira Rosen


Jakub Jelinek  wrote on 15/12/2011 09:02:57 AM:

> On Thu, Dec 15, 2011 at 08:32:26AM +0200, Ira Rosen wrote:
> > > +  cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst
> > > (itype, 0));
> > > +  gsi = gsi_for_stmt (last_stmt);
> > > +  if (rhs_code == TRUNC_DIV_EXPR)
> > > +{
> > > +  tree var = vect_recog_temp_ssa_var (itype, NULL);
> > > +  def_stmt
> > > +   = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
> > > +fold_build2 (MINUS_EXPR, itype,
> > > +oprnd1,
> > > +build_int_cst (itype,
> > > + 1)),
> > > +build_int_cst (itype, 0));
> > > +  gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> >
> > Hmm, you are inserting pattern stmts. This was causing some mess in the
> > past as explained here
> > http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00801.html. Maybe you can
use
> > STMT_VINFO_PATTERN_DEF_STMT to keep a chain of def_stmts?
>
> Yes, I know, but STMT_VINFO_PATTERN_DEF_STMT contains a single gimple
stmt,
> while I need here several def stmts.

> +  S3  y_t = b_t < 0 ? N - 1 : 0;
> +  S2  x_t = b_t + y_t;
> +  S1' a_t = x_t >> log2 (N);

I was talking about putting S3 in STMT_VINFO_PATTERN_DEF_STMT of S2.

> I think it can't cause problems in
> this case, the stmts will be easily DCEd.

But it's really ugly to insert part of pattern sequence, don't you think?

Ira

>  We could turn
> STMT_VINFO_PATTERN_DEF_STMT into a gimple_seq perhaps, will try that and
see
> how invasive that would be.
>
>Jakub
>



Re: [Patch] Adjust diag-scans in vect-tests to fix fails on AVX/AVX2

2011-12-15 Thread Ira Rosen


Uros Bizjak  wrote on 15/12/2011 09:56:12 AM:

> On Thu, Dec 15, 2011 at 5:16 AM, Michael Zolotukhin
>  wrote:
> > Thanks!
> > Fixed patch is attached.
> >
> > Any other comments?
> >
> > Changelog:
> > 2011-12-14  Michael Zolotukhin  
> >
> >        * gcc.dg/vect/no-section-anchors-vect-31.c: Adjust array
> size and test
> >        diag-scans to fix fail on AVX.
> >        * gcc.dg/vect/no-section-anchors-vect-36.c: Ditto.
> >        * gcc.dg/vect/no-section-anchors-vect-64.c: Ditto.
> >        * gcc.dg/vect/no-section-anchors-vect-66.c: Ditto.
> >        * gcc.dg/vect/no-section-anchors-vect-68.c: Ditto.
> >        * gcc.dg/vect/no-section-anchors-vect-69.c: Ditto.
> >        * gcc.dg/vect/no-vfa-vect-dv-2.c: Adjust dg-scans.
> >        * gcc.dg/vect/pr45752.c: Ditto.
> >        * gcc.dg/vect/slp-perm-4.c: Ditto.
> >        * gcc.dg/vect/slp-perm-9.c: Ditto.
> >        * gcc.dg/vect/vect-33.c: Ditto.
> >        * gcc.dg/vect/vect-35.c: Ditto.
> >        * gcc.dg/vect/vect-6-big-array.c: Ditto.
> >        * gcc.dg/vect/vect-6.c: Ditto.
> >        * gcc.dg/vect/vect-91.c: Ditto.
> >        * gcc.dg/vect/vect-all-big-array.c: Ditto.
> >        * gcc.dg/vect/vect-all.c: Ditto.
> >        * gcc.dg/vect/vect-multitypes-1.c: Ditto.
> >        * gcc.dg/vect/vect-outer-4c.c: Ditto.
> >        * gcc.dg/vect/vect-outer-5.c: Ditto.
> >        * gcc.dg/vect/vect-over-widen-1.c: Ditto.
> >        * gcc.dg/vect/vect-over-widen-3.c: Ditto.
> >        * gcc.dg/vect/vect-over-widen-4.c: Ditto.
> >        * gcc.dg/vect/vect-peel-1.c: Ditto.
> >        * gcc.dg/vect/vect-peel-2.c: Ditto.
> >        * gcc.dg/vect/vect-peel-3.c: Ditto.
> >        * gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
> >        * gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
> >        * gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
> >        * gcc.dg/vect/wrapv-vect-reduc-pattern-2c.c: Ditto.
> >        * lib/target-supports.exp
> (check_effective_target_vect_any_perm): New function.
> >        (check_avx_available): Ditto.
> >        (check_effective_target_vect_sizes_32B_16B): Ditto.
> >        (check_prefer_avx128): Ditto.
> >        (check_effective_target_vect_aligned_arrays): Add handling of
AVX.
> >        (check_effective_target_vect_multiple_sizes): Ditto.
>
> Looks OK to me, the patch needs final approval from vectorizer
> manintainer (I propose that we go ahead with the patch, fixing any
> remaining problems with incremental patch.)

Fine with me.

Ira

>
> Thanks,
> Uros.
>



Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion (take 2)

2011-12-15 Thread Ira Rosen


Jakub Jelinek  wrote on 15/12/2011 12:54:29 PM:

> Perhaps it would be even cleaner to get rid of the pattern stmt and def
stmt
> seq distinction and just have pattern as whole be represented as
gimple_seq,
> but perhaps that cleanup can be deferred for later.

Sounds good.

> This patch also fixes
> a problem where vect_determine_vectorization_factor would iterate the
same
> stmt twice - for some reason both the original stmt and pattern stmt (and
> def stmt) are marked as relevant,

Do you have a testcase where the original stmt is marked as relevant? It
shouldn't be that way.

> and we iterate on the same original stmt
> not just 3 times, but 4 times - first iteration on the original stmt,
> setting analyze_pattern_stmt, second iteration starts with the pattern
stmt
> but clears analyze_pattern_stmt, then sees it has a def stmt and thus
> runs on the def stmt, third iteration with pattern_def set on entry
> again on the original stmt, setting analyze_pattern_stmt again and last
one
> on the pattern stmt (because pattern_def was already set on entry and it
> clears it).

In case the original stmt is not marked as relevant we don't really
analyze/transform the same stmt twice (and certainly not 4 times).

> Sounds like those two routines
> (vect_determine_vectorization_factor and vect_transform_loop) would be
> bettern rewritten with a helper that would handle a single stmt/stmt_info
> pair and we would just call that helper on all the original/pattern/def
> stmts we want to handle.

I agree.

>
> 2011-12-15  Jakub Jelinek  
>
>* tree-vectorizer.h (struct _stmt_vec_info): Remove pattern_def_stmt
>field, add pattern_def_seq.
>(STMT_VINFO_PATTERN_DEF_STMT): Remove.
>(STMT_VINFO_PATTERN_DEF_SEQ): Define.
>(NUM_PATTERNS): Bump to 10.
>* tree-vect-loop.c (vect_determine_vectorization_factor,
>vect_transform_loop): Adjust for pattern def changing from a single
>gimple stmt to gimple_seq.
>* tree-vect-stmts.c (vect_analyze_stmt, new_stmt_vec_info,
>free_stmt_vec_info): Likewise.
>* tree-vect-patterns.c (vect_recog_over_widening_pattern,
>vect_recog_vector_vector_shift_pattern,
>vect_recog_mixed_size_cond_pattern, adjust_bool_pattern_cast,
>adjust_bool_pattern, vect_mark_pattern_stmts): Likewise.
>(vect_recog_sdivmod_pow2_pattern): New function.
>(vect_vect_recog_func_ptrs): Add it.

The patch looks ok to me, but I wonder if it's appropriate for the current
stage.

Thanks,
Ira





Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion (take 2)

2011-12-15 Thread Ira Rosen


Jakub Jelinek  wrote on 15/12/2011 03:51:25 PM:

> On Thu, Dec 15, 2011 at 03:35:34PM +0200, Ira Rosen wrote:
> > > This patch also fixes
> > > a problem where vect_determine_vectorization_factor would iterate the
> > same
> > > stmt twice - for some reason both the original stmt and pattern stmt
(and
> > > def stmt) are marked as relevant,
> >
> > Do you have a testcase where the original stmt is marked as relevant?
It
> > shouldn't be that way.
>
> Yes, gcc.dg/vect/pr51015.c.  It isn't vectorized on x86_64-linux in the
> end, because of the other shift (long long), even with -mavx2 or -mxop.

Thanks, I'll take a look.

Ira

>
> > The patch looks ok to me, but I wonder if it's appropriate for the
current
> > stage.
>
> Richard said on IRC that he is ok with that.
>
>Jakub
>



Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion (take 2)

2011-12-18 Thread Ira Rosen


Jakub Jelinek  wrote on 15/12/2011 03:51:25 PM:

> On Thu, Dec 15, 2011 at 03:35:34PM +0200, Ira Rosen wrote:
> > > This patch also fixes
> > > a problem where vect_determine_vectorization_factor would iterate the
> > same
> > > stmt twice - for some reason both the original stmt and pattern stmt
(and
> > > def stmt) are marked as relevant,
> >
> > Do you have a testcase where the original stmt is marked as relevant?
It
> > shouldn't be that way.
>
> Yes, gcc.dg/vect/pr51015.c.  It isn't vectorized on x86_64-linux in the
> end, because of the other shift (long long), even with -mavx2 or -mxop.
>

We need to mark original stmts as relevant in case they have uses both
inside and outside the pattern (
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg02183.html). This case is a
bit different from the original intent:

S1:  D.2006_9 = D.2005_4 << D.2004_3;
S1':--> pattern stmt
S2:  max_sizes_10 = max_sizes_5 + D.2006_9;
S3:  D.2007_16 = D.2006_9 << D.2004_3;
S3':--> pattern stmt

D.2006_9 is used in S2, outside any pattern, and for this use we need LHS
of S1'. S3 is a pattern stmt, but this is actually another pattern. But we
still want the original D.2006_9 for S3', so we need to vectorize S1
(unless we change the pattern detector).

And, you are right, before your patch we analyzed the original stmt twice
in such cases.

Ira



[patch] Fix PR tree-optimization/51684

2011-12-28 Thread Ira Rosen

Hi,

This patch fixes an attempt to access gsi of pattern statement.

Bootstrapped and tested on ia64-unknown-linux-gnu by Uros and on
powerpc64-suse-linux by me.

Committed.

Ira

ChangeLog:

PR tree-optimization/51684
* tree-vect-slp.c (vect_schedule_slp_instance): Get gsi of original
statement in case of a pattern.
(vect_schedule_slp): Likewise.

Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c (revision 182703)
+++ gcc/tree-vect-slp.c (working copy)
@@ -2885,6 +2885,8 @@ vect_schedule_slp_instance (slp_tree node, slp_ins
   && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
 {
   gimple last_store = vect_find_last_store_in_slp_instance (instance);
+  if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
+   last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
   si = gsi_for_stmt (last_store);
 }

@@ -2989,6 +2991,8 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_ve
   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
 break;

+ if (is_pattern_stmt_p (vinfo_for_stmt (store)))
+   store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
   /* Free the attached stmt_vec_info and remove the stmt.  */
   gsi = gsi_for_stmt (store);
   gsi_remove (&gsi, true);



Re: [PATCH] PR testsuite/51097 fix: a lot of "FAIL: gcc.dg/vect" on i686 avx build 181167 to 181177

2011-12-29 Thread Ira Rosen


gcc-patches-ow...@gcc.gnu.org wrote on 28/12/2011 11:05:19 PM:

> Hi,

Hi Igor,

>
> Here is another patch about failures in gcc.dg/vect tests. These
> changes fix fails that could be seen on avx-built compilers. It also
> introduces no FAILs/XFAILs/XPASSes/ERRORs on regular i686, x86_64,
> avx2_32, avx2_64.
> Is it ok for the trunk?


> diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c
b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c
> index 2898918..1d190fc 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c
> @@ -43,5 +43,6 @@ int main (void)
>
>
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target vect_widen_mult_hi_to_si } } } */
> -/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern:
detected" 1 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern:
detected" 1 "vect" { target {! vect_float_no_int } } } } */
> +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern:
detected" 2 "vect" { target vect_float_no_int } } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */

Could you please explain what are you trying to do? How is
> +# Return 1 if the target supports hardware vectors of float and doesn't
support
> +# vectors of int, 0 otherwise.
related to the number of times that pattern is detected?

Thanks,
Ira


>
> Thanks,
> Igor
>
> 2011-12-28  Igor Zamyatin  
>
>        PR testsuite/51097
>        * lib/target-supports.exp
(check_effective_target_vect_float_no_int):
>        New function.
>        (check_avx2_available): Ditto.
>        * gcc.dg/vect/no-scevccp-outer-7.c: Adjust dg-scans for AVX-built
>        compiler.
>        * gcc.dg/vect/no-scevccp-vect-iv-3.c: Likewise.
>        * gcc.dg/vect/no-vfa-vect-depend-1.c: Likewise.
>        * gcc.dg/vect/no-vfa-vect-dv-2.c: Likewise.
>        * gcc.dg/vect/slp-perm-9.c: Likewise.
>        * gcc.dg/vect/slp-reduc-6.c: Likewise.
>        * gcc.dg/vect/slp-widen-mult-half.c: Likewise.
>        * gcc.dg/vect/vect-109.c: Likewise.
>        * gcc.dg/vect/vect-119.c: Likewise.
>        * gcc.dg/vect/vect-35-big-array.c: Likewise.
>        * gcc.dg/vect/vect-91.c: Likewise.
>        * gcc.dg/vect/vect-multitypes-4.c: Likewise.
>        * gcc.dg/vect/vect-multitypes-6.c: Likewise.
>        * gcc.dg/vect/vect-outer-4c-big-array.c: Likewise.
>        * gcc.dg/vect/vect-over-widen-1.c: Likewise.
>        * gcc.dg/vect/vect-over-widen-4.c: Likewise.
>        * gcc.dg/vect/vect-peel-1.c: Likewise.
>        * gcc.dg/vect/vect-peel-3.c: Likewise.
>        * gcc.dg/vect/vect-peel-4.c: Likewise.
>        * gcc.dg/vect/vect-reduc-dot-s16a.c: Likewise.
>        * gcc.dg/vect/vect-reduc-dot-s8a.c: Likewise.
>        * gcc.dg/vect/vect-reduc-dot-u8a.c: Likewise.
>        * gcc.dg/vect/vect-reduc-dot-u8b.c: Likewise.
>        * gcc.dg/vect/vect-reduc-pattern-1a.c: Likewise.
>        * gcc.dg/vect/vect-reduc-pattern-1b-big-array.c: Likewise.
>        * gcc.dg/vect/vect-reduc-pattern-1c-big-array.c: Likewise.
>        * gcc.dg/vect/vect-reduc-pattern-2a.c: Likewise.
>        * gcc.dg/vect/vect-reduc-pattern-2b-big-array.c: Likewise.
>        * gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise.
>        * gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise.
>        * gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise.
>        * gcc.dg/vect/vect-widen-mult-half.c: Likewise.
>        * gcc.dg/vect/vect-widen-mult-sum.c: Likewise.
>        * gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
>        * gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c: Likewise.
> [attachment "51097.patch" deleted by Ira Rosen/Haifa/IBM]



Re: [PATCH] PR testsuite/51097 fix: a lot of "FAIL: gcc.dg/vect" on i686 avx build 181167 to 181177

2011-12-29 Thread Ira Rosen


Igor Zamyatin  wrote on 29/12/2011 02:04:45 PM:

> When compiler configured with, say corei7-avx, it outputs twice more
> diagnostics on integer tests since AVX deals mostly with floats. I.e.
> compiler tries to vectorize on AVX vector size, than fails and then
> vectorizes on smaller vector size. This double work leads to double
> diagnostic output.

OK, so you why not use vect_sizes_32B_16B?

Ira




Re: [PATCH] PR testsuite/51097 fix: a lot of "FAIL: gcc.dg/vect" on i686 avx build 181167 to 181177

2011-12-29 Thread Ira Rosen


Igor Zamyatin  wrote on 29/12/2011 02:29:46 PM:

>
> Because it includes AVX and AVX2 which deals with int and for AVX2
> there are no problems with doubled diagnostics.

And you can't just update vect_int because AVX does support it but with
128-bit vectors, right?
So, your vect_float_no_int looks incorrect as well.
You need to describe the case when two vector sizes are analyzed, but the
first one always fails. Maybe vect_sizes_32B_16B_noint? Probably ugly, but
correct at least.

I also suggest to simplify the checks and not to check the number of times
a pattern was detected , like this:

Index: vect-widen-mult-half.c
===
--- vect-widen-mult-half.c  (revision 182703)
+++ vect-widen-mult-half.c  (working copy)
@@ -43,7 +43,7 @@ int main (void)
 }

 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target vect_widen_mult_hi_to_si } } } */
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern:
detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump "vect_recog_widen_mult_pattern: detected"
"vect" { target vect_widen_mult_hi_to_si_pattern } } } */
 /* { dg-final { scan-tree-dump-times "pattern recognized" 1
"vect" { target vect_widen_mult_hi_to_si_pattern } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */


And split the tests with several loops into several files, like, for
example, vect-widen-mult-const-s16.c.

The simplification is ok for slp-reduc-6.c, vect-119.c, vect-35-big-array.c
as well.

For the rest of the tests, I don't understand why there are alignment
messages printed twice for two vector sizes. Why doesn't the vectorizer
fail during vect_determine_vectorization_factor?

Ira

>
> I understand that all this looks quite bulky but it's hard to create
> something which looks better without loosing generality
>
>
> On Thu, Dec 29, 2011 at 4:15 PM, Ira Rosen  wrote:
> >
> >
> > Igor Zamyatin  wrote on 29/12/2011 02:04:45 PM:
> >
> >> When compiler configured with, say corei7-avx, it outputs twice more
> >> diagnostics on integer tests since AVX deals mostly with floats. I.e.
> >> compiler tries to vectorize on AVX vector size, than fails and then
> >> vectorizes on smaller vector size. This double work leads to double
> >> diagnostic output.
> >
> > OK, so you why not use vect_sizes_32B_16B?
> >
> > Ira
> >
> >
>



[patch] Fix PR tree-optimization/51704

2012-01-01 Thread Ira Rosen

Hi,

This patch adds a check that a statement is inside the loop/basic block
that is being analyzed before accessing its vect info.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/51704
* tree-vect-slp.c (vect_detect_hybrid_slp_stmts): Check that
a use is inside the basic block or loop before accessing its
vect info.

testsuite/ChangeLog:

PR tree-optimization/51704
* gfortran.dg/vect/no-fre-no-copy-prop-O3-pr51704.f90: New.
* gfortran.dg/vect/vect.exp: Run no-fre-no-copy-prop-O3-* with
corresponding flags.

(See attached file: patch.txt)Index: ChangeLog
===
--- ChangeLog   (revision 182767)
+++ ChangeLog   (working copy)
@@ -1,3 +1,10 @@
+2012-01-01  Ira Rosen  
+
+   PR tree-optimization/51704
+   * tree-vect-slp.c (vect_detect_hybrid_slp_stmts): Check that
+   a use is inside the basic block or loop before accessing its
+   vect info.
+
 2012-01-01  Jan Hubicka  
 
PR rtl-optimization/51069
Index: testsuite/gfortran.dg/vect/no-fre-no-copy-prop-O3-pr51704.f90
===
--- testsuite/gfortran.dg/vect/no-fre-no-copy-prop-O3-pr51704.f90   
(revision 0)
+++ testsuite/gfortran.dg/vect/no-fre-no-copy-prop-O3-pr51704.f90   
(revision 0)
@@ -0,0 +1,57 @@
+! { dg-do compile }
+
+  integer, parameter :: q = 2
+  integer, parameter :: nx=3, ny=2*q, nz=5
+  integer, parameter, dimension(nx,ny,nz) :: p  = &
+& reshape ((/ (i**2, i=1,size(p)) /), shape(p))
+  integer, parameter, dimension(   ny,nz) :: px = &
+& reshape ((/ (( &
+&   +  nx*(nx-1)*(2*nx-1)/6, &
+&   j=0,ny-1), k=0,nz-1) /), shape(px))
+  integer, parameter, dimension(nx,   nz) :: py = &
+& reshape ((/ (( &
+&   +(nx   )**2*ny*(ny-1)*(2*ny-1)/6, &
+&   i=0,nx-1), k=0,nz-1) /), shape(py))
+  integer, parameter, dimension(nx,ny   ) :: pz = &
+& reshape ((/ (( &
+&   +(nx*ny)**2*nz*(nz-1)*(2*nz-1)/6, &
+&   i=0,nx-1), j=0,ny-1) /), shape(pz))
+  integer, dimension(nx,ny,nz) :: a
+  integer, dimension(nx,ny   ) :: az
+  if (sum(sum(sum(a,1),2),1) /= sum(a)) call abort
+  if (sum(sum(sum(a,3),1),1) /= sum(a)) call abort
+  if (any(1+sum(eid(a),1)+ax+sum( &
+neid3(a), &
+1)+1  /= 3*ax+2))call abort
+  if (any(1+eid(sum(a,2))+ay+ &
+neid2( &
+sum(a,2) &
+)+1  /= 3*ay+2))call abort
+  if (any(sum(eid(sum(a,3))+az+2* &
+neid2(az) &
+,1)+1 /= 4*sum(az,1)+1)) call abort
+contains
+  elemental function eid (x)
+integer, intent(in) :: x
+  end function eid
+  function neid2 (x)
+integer, intent(in) :: x(:,:)
+integer :: neid2(size(x,1),size(x,2))
+neid2 = x
+  end function neid2
+  function neid3 (x)
+integer, intent(in) :: x(:,:,:)
+integer :: neid3(size(x,1),size(x,2),size(x,3))
+  end function neid3
+  elemental subroutine set (o, i)
+integer, intent(in)  :: i
+integer, intent(out) :: o
+  end subroutine set
+  elemental subroutine tes (i, o)
+integer, intent(in)  :: i
+integer, intent(out) :: o
+  end subroutine tes
+end
+
+! { dg-final { cleanup-tree-dump "vect" } }
+
Index: testsuite/gfortran.dg/vect/vect.exp
===
--- testsuite/gfortran.dg/vect/vect.exp (revision 182767)
+++ testsuite/gfortran.dg/vect/vect.exp (working copy)
@@ -90,6 +90,12 @@ lappend DEFAULT_VECTCFLAGS "-Ofast"
 dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/Ofast-*.\[fF\]{,90,95,03,08} ]]  \
 "" $DEFAULT_VECTCFLAGS
 
+# With -fno-tree-copy-prop -fno-tree-fre -O3
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-copy-prop" "-fno-tree-fre" "-O3"
+dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/no-fre-no-copy-prop-O3-*.\[fF\]{,90,95,03,08} ]]  \
+"" $DEFAULT_VECTCFLAGS
+
 # Clean up.
 set dg-do-what-default ${save-dg-do-what-default}
 
Index: testsuite/ChangeLog
=======
--- testsuite/ChangeLog (revision 182767)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,10 @@
+2012-01-01  Ira Rosen  
+
+   PR tree-optimization/51704
+   * gfortran.dg/vect/no-fre-no-copy-prop-O3-pr51704.f90: New.
+   * gfortran.dg/vect/vect.exp: Run no-fre-no-copy-prop-O3-* with
+   corresponding flags.
+
 2012-01-01  Jan Hubicka  
 
PR rtl-optimization/51069
Index: tree-vect-slp.c
===
--- tree-vect-slp.c (revision 182767)
+++ tree-vect-slp.c (working copy)
@@

[patch] Fix PR tree-optimization/51269

2012-01-03 Thread Ira Rosen

Hi,

As described in PR 51269, the vectorizer adjusts number of prologue loop
iterations according to cost model, but never uses the result. This happens
because the result is not returned from the function that computes it, and
is, therefore, ignored.

Bootstrapped and tested on powerpc64-suse-linux.
I am going to commit it shortly.

Ira

ChangeLog:

PR tree-optimization/51269
* tree-vect-loop-manip.c (set_prologue_iterations): Make
first_niters
a pointer.
(slpeel_tree_peel_loop_to_edge): Likewise.
(vect_do_peeling_for_loop_bound): Update call to
slpeel_tree_peel_loop_to_edge.
(vect_gen_niters_for_prolog_loop): Don't compute wide_prolog_niters
here.  Remove it from the parameters list.
(vect_do_peeling_for_alignment): Update calls and compute
wide_prolog_niters.

Index: tree-vect-loop-manip.c
===
--- tree-vect-loop-manip.c  (revision 182833)
+++ tree-vect-loop-manip.c  (working copy)
@@ -1037,7 +1037,7 @@ slpeel_verify_cfg_after_peeling (struct loop *firs

 static void
 set_prologue_iterations (basic_block bb_before_first_loop,
-tree first_niters,
+tree *first_niters,
 struct loop *loop,
 unsigned int th)
 {
@@ -1100,9 +1100,9 @@ set_prologue_iterations (basic_block bb_before_fir
   newphi = create_phi_node (var, bb_before_first_loop);
   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
   UNKNOWN_LOCATION);
-  add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
+  add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);

-  first_niters = PHI_RESULT (newphi);
+  *first_niters = PHI_RESULT (newphi);
 }

 /* Function slpeel_tree_peel_loop_to_edge.
@@ -1158,7 +1158,7 @@ set_prologue_iterations (basic_block bb_before_fir

 static struct loop*
 slpeel_tree_peel_loop_to_edge (struct loop *loop,
-  edge e, tree first_niters,
+  edge e, tree *first_niters,
   tree niters, bool update_first_loop_count,
   unsigned int th, bool check_profitability,
   tree cond_expr, gimple_seq
cond_expr_stmt_list)
@@ -1328,8 +1328,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   if (!update_first_loop_count)
 {
   pre_condition =
-   fold_build2 (LE_EXPR, boolean_type_node, first_niters,
-build_int_cst (TREE_TYPE (first_niters), 0));
+   fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+build_int_cst (TREE_TYPE (*first_niters), 0));
   if (check_profitability)
{
  tree scalar_loop_iters
@@ -1360,8 +1360,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
 loop, th);

   pre_condition =
-   fold_build2 (LE_EXPR, boolean_type_node, first_niters,
-build_int_cst (TREE_TYPE (first_niters), 0));
+   fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+build_int_cst (TREE_TYPE (*first_niters), 0));
 }

   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
@@ -1402,7 +1402,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   bb_after_second_loop = split_edge (single_exit (second_loop));

   pre_condition =
-   fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
+   fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
   bb_after_second_loop,
bb_before_first_loop);
   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
@@ -1411,7 +1411,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
*/
   if (update_first_loop_count)
-slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
+slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);

   BITMAP_FREE (definitions);
   delete_update_ssa ();
@@ -1925,7 +1925,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop
 }

   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
-ratio_mult_vf_name, ni_name,
false,
+&ratio_mult_vf_name, ni_name,
false,
 th, check_profitability,
cond_expr,
cond_expr_stmt_list);
   gcc_assert (new_loop);
@@ -1988,8 +1988,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop
use TYPE_VECTOR_SUBPARTS.  */

 static tree
-vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree
loop_niters,
-tree *wide_prolog_niters)
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinf

[patch] Fix PR tree-optimization/51799

2012-01-12 Thread Ira Rosen

Hi,

In over-widening pattern we expect the last statement to be a type
demotion, but don't check this properly. The patch fixes the check, and
also updates vect-widen-shift-u8.c to expect additional widening shift
pattern instead of over-widening pattern.

Bootstrapped and tested on powerpc64-suse-linux and checked with cross for
arm-linux-gnueabi.

Committed.

Ira

ChangeLog:

PR tree-optimization/51799
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check
that the last operation is a type demotion.

testsuite/ChangeLog:

PR tree-optimization/51799
* gcc.dg/vect/pr51799.c: New test.
* gcc.dg/vect/vect-widen-shift-u8.c: Expect two widening shift
patterns.

Index: testsuite/gcc.dg/vect/pr51799.c
===
--- testsuite/gcc.dg/vect/pr51799.c (revision 0)
+++ testsuite/gcc.dg/vect/pr51799.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned long uint32_t;
+void
+f0a (uint32_t * __restrict__ result, int8_t * __restrict__ arg1,
+ uint32_t * __restrict__ arg4, int8_t temp_6)
+{
+  int idx;
+  for (idx = 0; idx < 416; idx += 1)
+{
+  result[idx] = (uint8_t)(((arg1[idx] << 7) + arg4[idx]) * temp_6);
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-widen-shift-u8.c
===
--- testsuite/gcc.dg/vect/vect-widen-shift-u8.c (revision 183125)
+++ testsuite/gcc.dg/vect/vect-widen-shift-u8.c (working copy)
@@ -59,7 +59,6 @@ int main (void)
   return 0;
 }

-/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern:
detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern:
detected" 2 "vect" { target vect_widen_shift } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
-
Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 183125)
+++ tree-vect-patterns.c(working copy)
@@ -1186,13 +1186,15 @@ vect_recog_over_widening_pattern (VEC (gimple, hea
 {
   use_lhs = gimple_assign_lhs (use_stmt);
   use_type = TREE_TYPE (use_lhs);
-  /* Support only type promotion or signedess change.  Check that
USE_TYPE
-is not bigger than the original type.  */
+  /* Support only type demotion or signedess change.  */
   if (!INTEGRAL_TYPE_P (use_type)
-  || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
- || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
+ || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
 return NULL;

+  /* Check that NEW_TYPE is not bigger than the conversion result.  */
+  if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
+   return NULL;
+
   if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
   || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
 {



Re: [PATCH] Don't ICE in vectorizer when testing if a pattern stmt is used by another pattern stmt (PR tree-optimization/52073)

2012-02-01 Thread Ira Rosen


Jakub Jelinek  wrote on 01/02/2012 06:40:13 PM:


> Hi!
>

Hi,

> vinfo_for_stmt can't be used on stmts outside of the current loop.
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Yes.

Thanks,
Ira

>
> 2012-02-01  Jakub Jelinek  
>
>PR tree-optimization/52073
>* tree-vect-stmts.c (vect_mark_relevant): When checking uses of
>a pattern stmt for pattern uses, ignore uses outside of the loop.
>
>* gcc.c-torture/compile/pr52073.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj   2012-01-22 16:02:10.0 +0100
> +++ gcc/tree-vect-stmts.c   2012-02-01 10:33:58.847815421 +0100
> @@ -150,6 +150,8 @@ vect_mark_relevant (VEC(gimple,heap) **w
>use_operand_p use_p;
>gimple use_stmt;
>tree lhs;
> + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>
>if (is_gimple_assign (stmt))
>  lhs = gimple_assign_lhs (stmt);
> @@ -166,6 +168,9 @@ vect_mark_relevant (VEC(gimple,heap) **w
>  continue;
>use_stmt = USE_STMT (use_p);
>
> +  if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
> +continue;
> +
>if (vinfo_for_stmt (use_stmt)
>&& STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
>  {
> --- gcc/testsuite/gcc.c-torture/compile/pr52073.c.jj   2012-02-01
> 10:39:13.041003562 +0100
> +++ gcc/testsuite/gcc.c-torture/compile/pr52073.c   2012-02-01 10:
> 38:51.0 +0100
> @@ -0,0 +1,28 @@
> +/* PR tree-optimization/52073 */
> +
> +int a, b, c, d, e, f;
> +
> +void
> +foo (int x)
> +{
> +  e = 1;
> +  for (;;)
> +{
> +  int g = c;
> +  if (x)
> +   {
> + if (e)
> +   continue;
> + while (a)
> +   --f;
> +   }
> +  else
> +   for (b = 5; b; b--)
> + {
> +   d = g;
> +   g = 0 == d;
> + }
> +  if (!g)
> +   x = 0;
> +}
> +}
>
>Jakub
>



[patch] Fix PR tree-optimization/52091

2012-02-04 Thread Ira Rosen

Hi,

The testcase in the PR fails when the vectorizer tries to create a
reduction init statement for a non-reduction use. This patch adds a check
that if operand's def stmt is a double reduction phi node, the use should
be a phi node too.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/52091
* tree-vectorizer.h (vect_is_simple_use): Add an argument.
(vect_is_simple_use_1): Likewise.
* tree-vect-loop.c (vectorizable_reduction): Update calls
to vect_is_simple_use_1 and vect_is_simple_use.
(vectorizable_live_operation): Likewise.
* tree-vect-patterns.c (widened_name_p,
vect_recog_vector_vector_shift_pattern, check_bool_pattern):
Likewise.
* tree-vect-stmts.c (process_use, vect_get_vec_def_for_operand,
vectorizable_call, vectorizable_conversion,
vectorizable_assignment, vectorizable_shift,
vectorizable_operation, vectorizable_store, vectorizable_load):
Likewise.
(vect_is_simple_cond): Add an argument, pass it to
vect_is_simple_use_1.
(vectorizable_condition): Update calls to vect_is_simple_cond,
vect_is_simple_use.
(vect_is_simple_use): Add an argument, the statement in which
OPERAND is used.  Check that if OPERAND's def stmt is a double
reduction phi node, the use is a phi node too.
(vect_is_simple_use_1): Add an argument, pass it to
vect_is_simple_use.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Update a call
to vect_is_simple_use.

testsuite/ChangeLog:

PR tree-optimization/52091
* gcc.dg/vect/pr52091.c: New test.

(See attached file: patch.txt)Index: ChangeLog
===
--- ChangeLog   (revision 183901)
+++ ChangeLog   (working copy)
@@ -1,3 +1,31 @@
+2012-02-05  Ira Rosen  
+
+   PR tree-optimization/52091
+   * tree-vectorizer.h (vect_is_simple_use): Add an argument.
+   (vect_is_simple_use_1): Likewise.
+   * tree-vect-loop.c (vectorizable_reduction): Update calls
+   to vect_is_simple_use_1 and vect_is_simple_use.
+   (vectorizable_live_operation): Likewise.
+   * tree-vect-patterns.c (widened_name_p,
+   vect_recog_vector_vector_shift_pattern, check_bool_pattern):
+   Likewise.
+   * tree-vect-stmts.c (process_use, vect_get_vec_def_for_operand,
+   vectorizable_call, vectorizable_conversion,
+   vectorizable_assignment, vectorizable_shift,
+   vectorizable_operation, vectorizable_store, vectorizable_load):
+   Likewise.
+   (vect_is_simple_cond): Add an argument, pass it to
+   vect_is_simple_use_1.
+   (vectorizable_condition): Update calls to vect_is_simple_cond,
+   vect_is_simple_use.
+   (vect_is_simple_use): Add an argument, the statement in which
+   OPERAND is used.  Check that if OPERAND's def stmt is a double
+   reduction phi node, the use is a phi node too.
+   (vect_is_simple_use_1): Add an argument, pass it to
+   vect_is_simple_use.
+   * tree-vect-slp.c (vect_get_and_check_slp_defs): Update a call
+   to vect_is_simple_use.
+
 2012-02-04  Jakub Jelinek  
 
PR rtl-optimization/52095
Index: testsuite/gcc.dg/vect/pr52091.c
===
--- testsuite/gcc.dg/vect/pr52091.c (revision 0)
+++ testsuite/gcc.dg/vect/pr52091.c (revision 0)
@@ -0,0 +1,31 @@
+/* { dg-require-effective-target vect_int } */
+
+/* PR tree-optimization/52091 */
+
+int b, c, d, f;
+unsigned h;
+extern void abort (void);
+
+int
+main ()
+{
+  d = -1;
+  h = 65;
+  asm volatile ("" : : : "memory");
+  for (f = 0; f < 4; f++)
+{
+  h &= (unsigned short) d;
+  for (b = 0; b <= 1; b++)
+{
+  c = 0;
+  d &= 1;
+}
+}
+  asm volatile ("" : : : "memory");
+  if (b != 2 || c != 0 || d != 1 || f != 4 || h != 1)
+abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog (revision 183901)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,8 @@
+2012-02-05  Ira Rosen  
+
+   PR tree-optimization/52091
+   * gcc.dg/vect/pr52091.c: New test.
+
 2012-02-04  Jakub Jelinek  
 
PR rtl-optimization/52113
Index: tree-vectorizer.h
===
--- tree-vectorizer.h   (revision 183901)
+++ tree-vectorizer.h   (working copy)
@@ -1,5 +1,5 @@
 /* Vectorizer
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Contributed by Dorit Naishlos 
 
@@ -808,9 +808,11 @@ extern bool vect_can_advan

[patch] [4.8] Support pattern recognition in SLP

2012-02-05 Thread Ira Rosen

Hi,

This patch adds a support of pattern recognition in basic block SLP.

Bootstrapped and tested on powerpc64-suse-linux.

Ira

ChangeLog:

* tree-vectorizer.h (vect_pattern_recog): Add new argument.
* tree-vect-loop.c (vect_analyze_loop_2): Update call to
vect_pattern_recog.
* tree-vect-patterns.c (widened_name_p): Pass basic block
info to vect_is_simple_use.
(vect_recog_dot_prod_pattern): Fail for basic blocks.
(vect_recog_widen_sum_pattern): Likewise.
(vect_handle_widen_op_by_const): Support basic blocks.
(vect_operation_fits_smaller_type,
vect_recog_over_widening_pattern): Likewise.
(vect_recog_vector_vector_shift_pattern): Support basic blocks.
Update call to vect_is_simple_use.
(vect_recog_mixed_size_cond_pattern): Support basic blocks.
Add printing.
(check_bool_pattern): Add an argument, update call to
vect_is_simple_use and the recursive calls.
(vect_recog_bool_pattern): Update relevant function calls.
Add printing.
(vect_mark_pattern_stmts): Update calls to new_stmt_vec_info.
(vect_pattern_recog_1): Check for reduction only in loops.
(vect_pattern_recog): Add new argument.  Support basic blocks.
* tree-vect-stmts.c (vectorizable_conversion): Pass basic block
info to vect_is_simple_use_1.
* tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic
blocks.
(vect_slp_analyze_bb_1): Call vect_pattern_recog.

testsuite/ChangeLog:

* gcc.dg/vect/bb-slp-pattern-1.c: New test.
* gcc.dg/vect/bb-slp-pattern-2.c: New test.

(See attached file: bb-patterns.txt)Index: testsuite/gcc.dg/vect/bb-slp-pattern-1.c
===
--- testsuite/gcc.dg/vect/bb-slp-pattern-1.c(revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-pattern-1.c(revision 0)
@@ -0,0 +1,54 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define N 8
+
+unsigned short X[N];
+unsigned short Y[N];
+unsigned int result[N];
+
+/* unsigned short->unsigned int widening-mult.  */
+__attribute__ ((noinline, noclone)) void
+foo (void)
+{
+  result[0] = (unsigned int) (X[0] * Y[0]);
+  result[1] = (unsigned int) (X[1] * Y[1]);
+  result[2] = (unsigned int) (X[2] * Y[2]);
+  result[3] = (unsigned int) (X[3] * Y[3]);
+  result[4] = (unsigned int) (X[4] * Y[4]);
+  result[5] = (unsigned int) (X[5] * Y[5]);
+  result[6] = (unsigned int) (X[6] * Y[6]);
+  result[7] = (unsigned int) (X[7] * Y[7]);
+}
+
+int main (void)
+{
+  int i, tmp;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  X[i] = i;
+  Y[i] = 64-i;
+}
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+{
+  __asm__ volatile ("");
+  tmp = X[i] * Y[i];
+  if (result[i] != tmp)
+abort ();
+}
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { 
target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 
8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target 
vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
Index: testsuite/gcc.dg/vect/bb-slp-pattern-2.c
===
--- testsuite/gcc.dg/vect/bb-slp-pattern-2.c(revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-pattern-2.c(revision 0)
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__((noinline, noclone)) void
+foo (short * __restrict__ a, int * __restrict__ b, int stride)
+{
+  int i;
+
+  for (i = 0; i < N/stride; i++, a += stride, b += stride)
+   {
+ a[0] = b[0] ? 1 : 7;
+ a[1] = b[1] ? 2 : 0;
+ a[2] = b[2] ? 3 : 0;
+ a[3] = b[3] ? 4 : 0;
+ a[4] = b[4] ? 5 : 0;
+ a[5] = b[5] ? 6 : 0;
+ a[6] = b[6] ? 7 : 0;
+ a[7] = b[7] ? 8 : 0;
+   }
+}
+
+short a[N];
+int b[N];
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  a[i] = i;
+  b[i] = -i;
+}
+
+  foo (a, b, 8);
+
+  for (i = 1; i < N; i++)
+if (a[i] != i%8 + 1)
+  abort ();
+
+  if (a[0] != 7)
+abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 
"slp" { target { vect_element_align && vect_pack_trunc } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
Index: tree-vectorizer.h
===
--- tree-vectorizer.h   (revision 183902)
+++ tree-vectorizer.h   (working copy)
@@ -933,7 +933,7 @@ extern void vect_slp_transform_bb (basic_block);
in the future.  */
 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
 #define NUM_PATTERNS 10

[patch] [4.8] Mixed condition vect pattern for non-constants

2012-02-06 Thread Ira Rosen

Hi,

This patch enhances mixed condition pattern detection to work with
non-constant integral then and else clauses. It checks that 'then' and
'else' are results of type conversion from the comparison type to their
current type, and generates the whole cond_epxr in comparison type
(ignoring the conversions).

Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux and
x86_64-suse-linux.

Ira

ChangeLog:

* tree-vect-patterns.c (widened_name_p): Rename to ...
(type_conversion_p): ... this.  Add new argument to determine
if it's a promotion or demotion operation.  Check for
CONVERT_EXPR_CODE_P instead of NOP_EXPR.
(vect_recog_dot_prod_pattern): Call type_conversion_p instead
widened_name_p.
(vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern,
vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern):
Likewise.
(vect_recog_mixed_size_cond_pattern): Likewise and allow
non-constant then and else clauses.

testsuite/ChangeLog:

* gcc.dg/vect/slp-cond-3.c: New test.
* gcc.dg/vect/slp-cond-4.c: New test.

(See attached file: mixed-cond.txt)Index: testsuite/gcc.dg/vect/slp-cond-3.c
===
--- testsuite/gcc.dg/vect/slp-cond-3.c  (revision 0)
+++ testsuite/gcc.dg/vect/slp-cond-3.c  (revision 0)
@@ -0,0 +1,84 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+/* Comparison in int, then/else and result in unsigned char.  */
+
+static inline unsigned char
+foo (int x, int y, int a, int b)
+{
+  if (x >= y)
+return a;
+  else
+return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b,
+ unsigned char * __restrict__ c, unsigned char * __restrict__ d,
+ unsigned char * __restrict__ e, int w)
+{
+  int i;
+  for (i = 0; i < N/16; i++, a += 16, b += 16, c += 16, d += 16, e += 16)
+{
+  e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
+  e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
+  e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
+  e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
+  e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
+  e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
+  e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
+  e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
+  e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
+  e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
+  e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
+  e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
+  e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
+  e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
+  e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
+  e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
+}
+}
+
+
+unsigned char a[N], b[N], c[N], d[N], e[N];
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+{
+  a[i] = i;
+  b[i] = 5;
+  e[i] = 0;
+
+  switch (i % 9)
+{
+case 0: asm (""); c[i] = i; d[i] = i + 1; break;
+case 1: c[i] = 0; d[i] = 0; break;
+case 2: c[i] = i + 1; d[i] = i - 1; break;
+case 3: c[i] = i; d[i] = i + 7; break;
+case 4: c[i] = i; d[i] = i; break;
+case 5: c[i] = i + 16; d[i] = i + 3; break;
+case 6: c[i] = i - 5; d[i] = i; break;
+case 7: c[i] = i; d[i] = i; break;
+case 8: c[i] = i; d[i] = i - 7; break;
+}
+}
+
+  bar (a, b, c, d, e, 2);
+  for (i = 0; i < N; i++)
+if (e[i] != ((i % 3) == 0 ? 10 : 2 * i))
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/slp-cond-4.c
===
--- testsuite/gcc.dg/vect/slp-cond-4.c  (revision 0)
+++ testsuite/gcc.dg/vect/slp-cond-4.c  (revision 0)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+/* Comparison in short, then/else and result in int.  */
+static inline int
+foo (short x, short y, int a, int b)
+{
+  if (x >= y)
+return a;
+  else
+return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (short * __restrict__ a, short * __restrict__ b,
+ short * __restrict__ c, short * __restrict__ d,
+ int * __restrict__ e, int w)
+{
+  int i;
+  int stride = 16;
+
+  for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
+d += stride, e += stride)
+{
+  e[0] = foo (c[0], d[0], a[0], b[0]);
+  e[1] = foo (c[1], d[1], a[1], b[1]);
+  e[2] = foo (c[2], d[2], a[2], b[2]);
+  e[3] = foo (c[3], d[3], a[3], b[3]);
+  e[4] = foo (c[4], d[4], a[4], b[4]);
+  e[5] = foo (c[5], d[5], a[5], b[5]);
+  e[6] = foo 

[committed] Remove myself as vectorizer maintainer

2012-02-07 Thread Ira Rosen

Hi,

I am starting to work on a new project and won't be able to continue with
vectorizer maintenance.

I'd like to thank all the people I had a chance to work with for making my
GCC experience so enjoyable.

All the best,
Ira


2012-02-08  Ira Rosen 

* MAINTAINERS (Various Maintainers): Remove myself as
auto-vectorizer
maintainer.

Index: MAINTAINERS
===
--- MAINTAINERS (revision 183967)
+++ MAINTAINERS (working copy)
@@ -240,7 +240,6 @@ RTL optimizers  Eric Botcazou
ebotcazou@libertysu
 RTL optimizers Richard Sandiford   rdsandif...@googlemail.com
 auto-vectorizerRichard Guentherrguent...@suse.de
 auto-vectorizerZdenek Dvorak   o...@ucw.cz
-auto-vectorizer        Ira Rosen   i...@il.ibm.com
 loop infrastructureZdenek Dvorak   o...@ucw.cz
 OpenMP Jakub Jelinek   ja...@redhat.com
 testsuite  Rainer Orth r...@cebitec.uni-bielefeld.de




[patch] Fix PR 49087 (was Re: Fix crash in vect_is_slp_reduction)

2011-05-22 Thread Ira Rosen


> >
> > No, we shouldn't arrive with a NULL use_stmt here.
>
> I think a proper fix will be to fail if there are no uses.
> I'll prepare a patch on Sunday.
>

Here is the patch. It bails out if LHS has no uses.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/49087
* tree-vect-loop.c (vect_is_slp_reduction): Fail if
LHS has no uses.

testsuite/ChangeLog:

PR tree-optimization/49087
* gcc.dg/vect/O3-pr49087.c: New test.


Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 174025)
+++ tree-vect-loop.c(working copy)
@@ -1704,7 +1704,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gi
   tree lhs;
   imm_use_iterator imm_iter;
   use_operand_p use_p;
-  int nloop_uses, size = 0;
+  int nloop_uses, size = 0, nuses;
   bool found = false;

   if (loop != vect_loop)
@@ -1715,9 +1715,11 @@ vect_is_slp_reduction (loop_vec_info loop_info, gi
   while (1)
 {
   nloop_uses = 0;
+  nuses = 0;
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
 {
   use_stmt = USE_STMT (use_p);
+  nuses++;
   if (is_gimple_debug (use_stmt))
 continue;

@@ -1739,6 +1741,10 @@ vect_is_slp_reduction (loop_vec_info loop_info, gi
 return false;
 }

+  /* We reached a statement with no uses.  */
+  if (nuses == 0)
+   return false;
+
   if (found)
 break;

Index: testsuite/gcc.dg/vect/O3-pr49087.c
===
--- testsuite/gcc.dg/vect/O3-pr49087.c  (revision 0)
+++ testsuite/gcc.dg/vect/O3-pr49087.c  (revision 0)
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+
+static char func2() { }
+
+struct S0
+{
+ int t;
+};
+
+int g;
+
+struct S0 s0;
+
+int
+foo (int arg)
+{
+  int *ptr = &g;
+  int i, j;
+  for (i = 0; i < 10; i += 1)
+{
+  for (j = 0; j < 1; j += 1)
+   {
+ int k;
+ if (arg)
+   {
+ int l;
+ for (k = 1; arg < 10; arg = func2 ())
+   {
+ return l;
+   }
+   }
+ *ptr = func2 () ^ s0.t;
+   }
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */



[patch] Fix PR tree-optimization/49038

2011-05-26 Thread Ira Rosen
Hi,

The vectorizer supports strided loads with gaps, e.g., when only a[4i]
and a[4i+2] are accessed, it generates a vector load a[4i:4i+3], i.e.,
creating an access to a[4i+3], which doesn't exist in the scalar code.
This access maybe invalid as described in the PR.

This patch creates an epilogue loop (with at least one iteration) for
such cases.

Bootstrapped and tested on powerpc64-suse-linux.
Applied to trunk. I'll prepare patches for 4.5 and 4.6 next week.

Ira


ChangeLog:

PR tree-optimization/49038
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader):
Ensure at least one epilogue iteration if required by data
accesses with gaps.
* tree-vectorizer.h (struct _loop_vec_info): Add new field
to mark loops that require peeling for gaps.
* tree-vect-loop.c (new_loop_vec_info): Initialize new field.
(vect_get_known_peeling_cost): Take peeling for gaps into
account.
(vect_transform_loop): Generate epilogue if required by data
access with gaps.
* tree-vect-data-refs.c (vect_analyze_group_access): Mark the
loop as requiring an epilogue if there are gaps in the end of
the strided group.

testsuite/ChangeLog:

PR tree-optimization/49038
* gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c: New test.
* gcc.dg/vect/pr49038.c: New test.
Index: tree-vect-loop-manip.c
===
--- tree-vect-loop-manip.c  (revision 174264)
+++ tree-vect-loop-manip.c  (working copy)
@@ -1551,7 +1551,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loo
   edge pe;
   basic_block new_bb;
   gimple_seq stmts;
-  tree ni_name;
+  tree ni_name, ni_minus_gap_name;
   tree var;
   tree ratio_name;
   tree ratio_mult_vf_name;
@@ -1568,9 +1568,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loo
   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
 
+  /* If epilogue loop is required because of data accesses with gaps, we
+ subtract one iteration from the total number of iterations here for
+ correct calculation of RATIO.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+{
+  ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+  ni_name,
+  build_one_cst (TREE_TYPE (ni_name)));
+  if (!is_gimple_val (ni_minus_gap_name))
+   {
+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+  add_referenced_var (var);
+
+  stmts = NULL;
+  ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+   true, var);
+  if (cond_expr_stmt_list)
+gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+  else
+{
+  pe = loop_preheader_edge (loop);
+  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+  gcc_assert (!new_bb);
+}
+}
+}
+  else
+ni_minus_gap_name = ni_name;
+
   /* Create: ratio = ni >> log2(vf) */
 
-  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+   ni_minus_gap_name, log_vf);
   if (!is_gimple_val (ratio_name))
 {
   var = create_tmp_var (TREE_TYPE (ni), "bnd");
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c
===
--- testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c (revision 0)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c (revision 0)
@@ -0,0 +1,103 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include 
+#include "tree-vect.h"
+
+#define N 160 
+
+typedef struct {
+   unsigned char a;
+   unsigned char b;
+   unsigned char c;
+   unsigned char d;
+   unsigned char e;
+   unsigned char f;
+   unsigned char g;
+   unsigned char h;
+} s;
+
+__attribute__ ((noinline)) int
+main1 (s *arr, int n)
+{
+  int i;
+  s *ptr = arr;
+  s res[N];
+  unsigned char x;
+
+  /* Check peeling for gaps for unknown loop bound.  */
+  for (i = 0; i < n; i++)
+{
+  res[i].c = ptr->b + ptr->c;
+  x = ptr->c + ptr->f;
+  res[i].a = x + ptr->b;
+  res[i].d = ptr->b + ptr->c;
+  res[i].b = ptr->c;
+  res[i].f = ptr->f + ptr->e;
+  res[i].e = ptr->b + ptr->e; 
+  res[i].h = ptr->c;   
+  res[i].g = ptr->b + ptr->c;
+  ptr++; 
+} 
+   
+  /* check results:  */
+  for (i = 0; i < n; i++)
+{ 
+  if (res[i].c != arr[i].b + arr[i].c
+  || res[i].a != arr[i].c + arr[i].f + arr[i].b
+  || res[i].d != arr[i].b + arr[i].c
+  || res[i].b != arr[i].c
+  || res[i].f != arr[i].f + arr[i].e
+  || res[i].e != arr[i].b + arr[i].e
+  || re

[patch] Fix PR testsuite/49222

2011-05-29 Thread Ira Rosen
Hi,

This patch uses MAP_ANON if MAP_ANONYMOUS is not defined fixing this
test's failure on x86_64-apple-darwin10.

Tested on x86_64-suse-linux and on x86_64-apple-darwin10 (by Dominique).
OK to apply?

Thanks,
Ira

testsuite/ChangeLog:

 PR testsuite/49222
 * gcc.dg/vect/pr49038.c: Use MAP_ANON if MAP_ANONYMOUS
 is not defined.

Index: testsuite/gcc.dg/vect/pr49038.c
===
--- testsuite/gcc.dg/vect/pr49038.c (revision 174393)
+++ testsuite/gcc.dg/vect/pr49038.c (working copy)
@@ -6,6 +6,10 @@
 #define ADDRESS 0x112200
 #define TYPE unsigned short

+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
 void __attribute__((noinline))
 foo (TYPE *__restrict a, TYPE *__restrict b)
 {


[patch] Fix PR49199 - ICE with SLP reduction

2011-05-30 Thread Ira Rosen
Hi,

For SLP reduction (unlike regular reduction) the operands are not
checked to be loop internal definitions. This patch adds such checks.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/49199
* tree-vect-loop.c (vect_is_slp_reduction): Check that the
non-reduction operands are either defined in the loop or
by induction.

testsuite/ChangeLog:

PR tree-optimization/49199
* gcc.dg/vect/no-scevccp-pr49199.c: New test.
* gcc.dg/vect/vect.exp: Run no-scevccp-pr* tests with
-fno-tree-scev-cprop.
Index: ChangeLog
===
--- ChangeLog   (revision 174424)
+++ ChangeLog   (working copy)
@@ -1,3 +1,10 @@
+2011-05-30  Ira Rosen  
+
+   PR tree-optimization/49199
+   * tree-vect-loop.c (vect_is_slp_reduction): Check that the
+   non-reduction operands are either defined in the loop or
+   by induction.
+
 2011-05-29  Xinliang David Li  
 
* opts-global.c (handle_common_deferred_options): Handle new options.
Index: testsuite/gcc.dg/vect/no-scevccp-pr49199.c
===
--- testsuite/gcc.dg/vect/no-scevccp-pr49199.c  (revision 0)
+++ testsuite/gcc.dg/vect/no-scevccp-pr49199.c  (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int const_bar (void) __attribute__ ((__const__));
+int pure_bar (void) __attribute__ ((__pure__));
+
+int foo (void)
+{
+  int i = 0, x = 0;
+  for (; i < 100; i++)
+{
+   x += const_bar ();
+   x += pure_bar ();
+}
+  return x;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect.exp
===
--- testsuite/gcc.dg/vect/vect.exp  (revision 174424)
+++ testsuite/gcc.dg/vect/vect.exp  (working copy)
@@ -176,6 +176,12 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdi
 # -fno-tree-scev-cprop
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
 lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-pr*.\[cS\]]]  \
+"" $DEFAULT_VECTCFLAGS
+
+# -fno-tree-scev-cprop
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop"
 dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/no-scevccp-outer-*.\[cS\]]]  \
 "" $DEFAULT_VECTCFLAGS
 
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog (revision 174424)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,10 @@
+2011-05-30  Ira Rosen  
+
+   PR tree-optimization/49199
+   * gcc.dg/vect/no-scevccp-pr49199.c: New test.
+   * gcc.dg/vect/vect.exp: Run no-scevccp-pr* tests with
+   -fno-tree-scev-cprop.
+
 2011-05-29  Janus Weil  
 
PR fortran/47601
Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 174424)
+++ tree-vect-loop.c(working copy)
@@ -1700,7 +1700,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gi
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
   enum tree_code code;
-  gimple current_stmt = NULL, use_stmt = NULL, first;
+  gimple current_stmt = NULL, use_stmt = NULL, first, next_stmt;
   stmt_vec_info use_stmt_info, current_stmt_info;
   tree lhs;
   imm_use_iterator imm_iter;
@@ -1778,36 +1778,92 @@ vect_is_slp_reduction (loop_vec_info loop_info, gi
   if (!found || use_stmt != phi || size < 2)
 return false;
 
-  /* Save the chain for further analysis in SLP detection.  */
-  first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
-  VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
-  GROUP_SIZE (vinfo_for_stmt (first)) = size;
-
   /* Swap the operands, if needed, to make the reduction operand be the second
  operand.  */
   lhs = PHI_RESULT (phi);
-  current_stmt = first;
-  while (current_stmt)
+  next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
+  while (next_stmt)
 {
-  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS
-  && gimple_assign_rhs2 (current_stmt) != lhs)
-{
-  if (vect_print_dump_info (REPORT_DETAILS))
-{
-  fprintf (vect_dump, "swapping oprnds: ");
-  print_gimple_stmt (vect_dump, current_stmt, 0, TDF_SLIM);
-}
+  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
+   {
+  if (gimple_assign_rhs2 (next_stmt) == lhs)
+   {
+ tree op = gimple_assign_rhs1 (next_stmt);
+  gimple def_stmt = NULL;
 
-  swap_tree_operands (current_stmt,
- gimple_assign_r

  1   2   >