Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard.
2017-10-06 Richard Biener <rguent...@suse.de> PR tree-optimization/82436 * tree-vect-slp.c (vect_supported_load_permutation_p): More conservatively choose the vectorization factor when checking whether we can perform the required load permutation. (vect_transform_slp_perm_load): Assert when we may not fail. * gcc.dg/vect/pr82436.c: New testcase. Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c (revision 253439) +++ gcc/tree-vect-slp.c (working copy) @@ -1567,14 +1567,20 @@ vect_supported_load_permutation_p (slp_i return true; } - /* For loop vectorization verify we can generate the permutation. */ + /* For loop vectorization verify we can generate the permutation. Be + conservative about the vectorization factor, there are permutations + that will use three vector inputs only starting from a specific factor + and the vectorization factor is not yet final. + ??? The SLP instance unrolling factor might not be the maximum one. */ unsigned n_perms; + unsigned test_vf + = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), + LOOP_VINFO_VECT_FACTOR + (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)))); FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) if (node->load_permutation.exists () - && !vect_transform_slp_perm_load - (node, vNULL, NULL, - SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true, - &n_perms)) + && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, + slp_instn, true, &n_perms)) return false; return true; @@ -3560,6 +3566,7 @@ vect_transform_slp_perm_load (slp_tree n dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); } + gcc_assert (analyze_only); return false; } @@ -3583,6 +3590,7 @@ vect_transform_slp_perm_load (slp_tree n dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]); dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); } + gcc_assert (analyze_only); return false; } Index: gcc/testsuite/gcc.dg/vect/pr82436.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr82436.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr82436.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast -fno-tree-scev-cprop" } */ +/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */ + +struct reflection_type +{ + int h; + int k; + int l; + double f_exp; + double f_sigma; + _Complex double f_calc; + double f_pred; + double i_exp; + double i_sigma; + double i_pred; +}; + +double y, w; +int foo (struct reflection_type *r, int n, unsigned s) +{ + int i; + y = 0; + w = 0; + for (i = 1; i < n; ++i) + { + struct reflection_type *x = &r[i*s]; + double fpred = x->f_pred; + double fexp = x->f_exp; + double tem = (fpred - fexp); + y += __builtin_fabs (tem / x->f_sigma); + w += __builtin_fabs (tem / fexp); + } + return i; +}