This fixes an oversight in strided permuted SLP loads which miscomputed
the number of required loads.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2016-01-12  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/69174
        * tree-vect-stmts.c (vect_mark_relevant): Remove excessive vertical
        space.
        (vectorizable_load): Properly compute the number of loads needed
        for permuted strided SLP loads and do not spuriously assign
        to SLP_TREE_VEC_STMTS.

        * gcc.dg/torture/pr69174.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c       (revision 232213)
--- gcc/tree-vect-stmts.c       (working copy)
*************** vect_mark_relevant (vec<gimple *> *workl
*** 190,197 ****
    gimple *pattern_stmt;
  
    if (dump_enabled_p ())
!     dump_printf_loc (MSG_NOTE, vect_location,
!                      "mark relevant %d, live %d.\n", relevant, live_p);
  
    /* If this stmt is an original stmt in a pattern, we might need to mark its
       related pattern stmt instead of the original stmt.  However, such stmts
--- 190,200 ----
    gimple *pattern_stmt;
  
    if (dump_enabled_p ())
!     {
!       dump_printf_loc (MSG_NOTE, vect_location,
!                      "mark relevant %d, live %d: ", relevant, live_p);
!       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
!     }
  
    /* If this stmt is an original stmt in a pattern, we might need to mark its
       related pattern stmt instead of the original stmt.  However, such stmts
*************** vectorizable_load (gimple *stmt, gimple_
*** 6748,6756 ****
          else
            ltype = vectype;
          ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
!         ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
          if (slp_perm)
!           dr_chain.create (ncopies);
        }
        for (j = 0; j < ncopies; j++)
        {
--- 6751,6766 ----
          else
            ltype = vectype;
          ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
!         /* For SLP permutation support we need to load the whole group,
!            not only the number of vector stmts the permutation result
!            fits in.  */
          if (slp_perm)
!           {
!             ncopies = (group_size * vf + nunits - 1) / nunits;
!             dr_chain.create (ncopies);
!           }
!         else
!           ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
        }
        for (j = 0; j < ncopies; j++)
        {
*************** vectorizable_load (gimple *stmt, gimple_
*** 6798,6806 ****
  
          if (slp)
            {
-             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
              if (slp_perm)
                dr_chain.quick_push (gimple_assign_lhs (new_stmt));
            }
          else
            {
--- 6808,6817 ----
  
          if (slp)
            {
              if (slp_perm)
                dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+             else
+               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
            }
          else
            {
Index: gcc/testsuite/gcc.dg/torture/pr69174.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr69174.c      (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr69174.c      (working copy)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ 
+ typedef int pixval;
+ typedef struct { pixval r, g, b; } xel;
+ int convertRow_sample, convertRaster_col;
+ short *convertRow_samplebuf;
+ xel *convertRow_xelrow;
+ short convertRow_spp;
+ void fn1() {
+     int *alpharow;
+     for (; convertRaster_col;
+        ++convertRaster_col, convertRow_sample += convertRow_spp) {
+       convertRow_xelrow[convertRaster_col].r =
+           convertRow_xelrow[convertRaster_col].g =
+           convertRow_xelrow[convertRaster_col].b =
+           convertRow_samplebuf[convertRow_sample];
+       alpharow[convertRaster_col] = convertRow_samplebuf[convertRow_sample + 
3];
+     }
+ }

Reply via email to