On Thu, 4 Apr 2013, Richard Biener wrote:

> 
> This fixes the testcase in PR56826, the underlying issue being
> that a SLP group load with a gap is detected as supported while
> it is not because we compute ncopies incorrectly.  The following
> patch makes us compute ncopies more correctly.

Which leads to further errors and uncovers an old testcase
for the bug shown by PR56826 - just that it wasn't a runtime
testcase and cloaked as missed optimization ...

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2013-04-04  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/56826
        * tree-vect-slp.c (vect_build_slp_tree): Compute ncopies
        more accurately.

        * gcc.dg/vect/pr56826.c: New testcase.
        * gcc.dg/vect/O3-pr36098.c: Adjust.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 197480)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_build_slp_tree (loop_vec_info loop_
*** 470,476 ****
    tree lhs;
    bool stop_recursion = false, need_same_oprnds = false;
    tree vectype, scalar_type, first_op1 = NULL_TREE;
-   unsigned int ncopies;
    optab optab;
    int icode;
    enum machine_mode optab_op2_mode;
--- 470,475 ----
*************** vect_build_slp_tree (loop_vec_info loop_
*** 577,584 ****
              vectorization_factor = *max_nunits;
          }
  
-       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- 
        if (is_gimple_call (stmt))
        {
          rhs_code = CALL_EXPR;
--- 576,581 ----
*************** vect_build_slp_tree (loop_vec_info loop_
*** 741,752 ****
          else
            {
              /* Load.  */
                /* FORNOW: Check that there is no gap between the loads
                 and no gap between the groups when we need to load
                 multiple groups at once.
                 ???  We should enhance this to only disallow gaps
                 inside vectors.  */
!               if ((ncopies > 1
                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
                   && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
                  || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
--- 738,752 ----
          else
            {
              /* Load.  */
+             unsigned unrolling_factor
+               = least_common_multiple
+                   (*max_nunits, group_size) / group_size;
                /* FORNOW: Check that there is no gap between the loads
                 and no gap between the groups when we need to load
                 multiple groups at once.
                 ???  We should enhance this to only disallow gaps
                 inside vectors.  */
!               if ((unrolling_factor > 1
                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
                   && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
                  || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
*************** vect_build_slp_tree (loop_vec_info loop_
*** 767,772 ****
--- 767,774 ----
  
                /* Check that the size of interleaved loads group is not
                   greater than the SLP group size.  */
+             unsigned ncopies
+               = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
                if (loop_vinfo
                  && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
                    && ((GROUP_SIZE (vinfo_for_stmt (stmt))
Index: gcc/testsuite/gcc.dg/vect/pr56826.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr56826.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr56826.c (working copy)
***************
*** 0 ****
--- 1,41 ----
+ extern void abort (void);
+ 
+ typedef struct {
+     int a[3];
+     int num;
+ } t1;
+ t1 B[100];
+ int A[300];
+ 
+ void __attribute__((noinline,noclone))
+ bar (int *A, t1 *B, int n)
+ {
+   int i;
+   int *a = A;
+   for (i=0; i<n; i++, a+=3)
+     {
+       a[0] = B[i].a[0];
+       a[1] = B[i].a[1];
+       a[2] = B[i].a[2];
+     }
+ }
+ 
+ int main()
+ {
+   int i;
+   for (i=0; i<100; i++) 
+     {
+       B[i].num = i;
+       B[i].a[0] = i * 3;
+       B[i].a[1] = i * 3 + 1;
+       B[i].a[2] = i * 3 + 2;
+       __asm__ volatile ("");
+     }
+   bar (&A[0], &B[0], 100);
+   for (i=0; i<300; i++)
+     if (A[i] != i)
+       abort ();
+   return 0;
+ } 
+ 
+ /* { dg-final { cleanup-tree-dump "vect" } } */

Property changes on: gcc/testsuite/gcc.dg/vect/pr56826.c
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/vect/O3-pr36098.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/O3-pr36098.c      (revision 197480)
--- gcc/testsuite/gcc.dg/vect/O3-pr36098.c      (working copy)
*************** void foo (int ncons, t_sortblock *sb, in
*** 17,21 ****
       iatom[m]=sb[i].iatom[m];
  }
  
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 17,21 ----
       iatom[m]=sb[i].iatom[m];
  }
  
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } 
} */
  /* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to