This partly reverts / implements differently a patch done to support SLP reductions for SAD_EXPRs (gcc.dg/vect/slp-reduc-sad.c). Detecting those patterns unconditionally causes missed vectorization opportunities as we don't implement vectorizing them in non-reduction context.
Bootstrap / regtest pending on x86_64-unknown-linux-gnu. Richard. 2015-12-02 Richard Biener <rguent...@suse.de> PR tree-optimization/67800 PR tree-optimization/68333 * tree-vect-patterns.c (vect_recog_dot_prod_pattern): Restore restriction to reduction contexts but allow SLP reductions as well. (vect_recog_sad_pattern): Likewise. (vect_recog_widen_sum_pattern): Likewise. * gcc.target/i386/vect-pr67800.c: New testcase. Index: gcc/tree-vect-patterns.c =================================================================== --- gcc/tree-vect-patterns.c (revision 231167) +++ gcc/tree-vect-patterns.c (working copy) @@ -312,6 +312,9 @@ vect_recog_dot_prod_pattern (vec<gimple { gimple *def_stmt; + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def + && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) + return NULL; oprnd0 = gimple_assign_rhs1 (last_stmt); oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) @@ -531,6 +534,9 @@ vect_recog_sad_pattern (vec<gimple *> *s { gimple *def_stmt; + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def + && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) + return NULL; plus_oprnd0 = gimple_assign_rhs1 (last_stmt); plus_oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type) @@ -1152,6 +1158,10 @@ vect_recog_widen_sum_pattern (vec<gimple if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) return NULL; + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def + && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) + return NULL; + oprnd0 = gimple_assign_rhs1 (last_stmt); oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) Index: gcc/testsuite/gcc.target/i386/vect-pr67800.c =================================================================== --- gcc/testsuite/gcc.target/i386/vect-pr67800.c (revision 0) +++ gcc/testsuite/gcc.target/i386/vect-pr67800.c (working copy) @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ +/* { dg-additional-options "-msse4.2" } */ + +#define ubyte unsigned char +#define byte char + +#define SCALE 8 + +#define R2Y (76) +#define G2Y (150) +#define B2Y (30) +#define R2I (127) +#define G2I (-59) +#define B2I (-68) +#define R2Q (51) +#define G2Q (-127) +#define B2Q (76) + +void +convert(ubyte *in, ubyte *out, unsigned n) +{ + ubyte r, g, b; + ubyte y = 0; + byte i, q; + + while (--n) { + r = *in++; + g = *in++; + b = *in++; + + y = (ubyte)(((R2Y * r) + (G2Y * g) + (B2Y * b) + (1 << (SCALE - 1))) >> SCALE); + i = (byte)(((R2I * r) + (G2I * g) + (B2I * b) + (1 << (SCALE - 1))) >> SCALE); + q = (byte)(((R2Q * r) + (G2Q * g) + (B2Q * b) + (1 << (SCALE - 1))) >> SCALE); + + *out++ = y; + *out++ = i; + *out++ = q; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */