Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-10-06  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/82436
        * tree-vect-slp.c (vect_supported_load_permutation_p): More
        conservatively choose the vectorization factor when checking
        whether we can perform the required load permutation.
        (vect_transform_slp_perm_load): Assert when we may not fail.

        * gcc.dg/vect/pr82436.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c (revision 253439)
+++ gcc/tree-vect-slp.c (working copy)
@@ -1567,14 +1567,20 @@ vect_supported_load_permutation_p (slp_i
       return true;
     }
 
-  /* For loop vectorization verify we can generate the permutation.  */
+  /* For loop vectorization verify we can generate the permutation.  Be
+     conservative about the vectorization factor, there are permutations
+     that will use three vector inputs only starting from a specific factor
+     and the vectorization factor is not yet final.
+     ???  The SLP instance unrolling factor might not be the maximum one.  */
   unsigned n_perms;
+  unsigned test_vf
+    = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
+                            LOOP_VINFO_VECT_FACTOR
+                              (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
     if (node->load_permutation.exists ()
-       && !vect_transform_slp_perm_load
-             (node, vNULL, NULL,
-              SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
-              &n_perms))
+       && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
+                                         slp_instn, true, &n_perms))
       return false;
 
   return true;
@@ -3560,6 +3566,7 @@ vect_transform_slp_perm_load (slp_tree n
                  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
                                    stmt, 0);
                }
+             gcc_assert (analyze_only);
              return false;
            }
 
@@ -3583,6 +3590,7 @@ vect_transform_slp_perm_load (slp_tree n
                        dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
                      dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
                    }
+                 gcc_assert (analyze_only);
                  return false;
                }
 
Index: gcc/testsuite/gcc.dg/vect/pr82436.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr82436.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr82436.c (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fno-tree-scev-cprop" } */
+/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */
+
+struct reflection_type
+{
+  int h;
+  int k;
+  int l;
+  double f_exp;
+  double f_sigma;
+  _Complex double f_calc;
+  double f_pred;
+  double i_exp;
+  double i_sigma;
+  double i_pred;
+};
+
+double y, w;
+int foo (struct reflection_type *r, int n, unsigned s)
+{
+  int i;
+  y = 0;
+  w = 0;
+  for (i = 1; i < n; ++i)
+    {
+      struct reflection_type *x = &r[i*s];
+      double fpred = x->f_pred;
+      double fexp = x->f_exp;
+      double tem = (fpred - fexp);
+      y += __builtin_fabs (tem / x->f_sigma);
+      w += __builtin_fabs (tem / fexp);
+    }
+  return i;
+}

Reply via email to