This fixes an omission of promoting a bit-precision required precision to a vector element precision.
Bootstrapped & tested on x86_64-unknown-linux-gnu, pushed. 2021-04-01 Richard Biener <rguent...@suse.de> PR tree-optimization/99856 * tree-vect-patterns.c (vect_recog_over_widening_pattern): Promote precision to vector element precision. * gcc.dg/vect/pr99856.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr99856.c | 33 +++++++++++++++++++++++++++++ gcc/tree-vect-patterns.c | 1 + 2 files changed, 34 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr99856.c diff --git a/gcc/testsuite/gcc.dg/vect/pr99856.c b/gcc/testsuite/gcc.dg/vect/pr99856.c new file mode 100644 index 00000000000..e5d2a45be57 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr99856.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_unpack } */ +/* { dg-require-effective-target vect_pack_trunc } */ + +#define SHIFTFORDIV255(a)\ + ((((a) >> 8) + a) >> 8) + +#define DIV255(a)\ + SHIFTFORDIV255(a + 0x80) + +typedef unsigned char uint8_t; + +void +opSourceOver_premul(uint8_t* restrict Rrgba, + const uint8_t* restrict Srgba, + const uint8_t* restrict Drgba, int len) +{ + Rrgba = __builtin_assume_aligned (Rrgba, __BIGGEST_ALIGNMENT__); + Srgba = __builtin_assume_aligned (Rrgba, __BIGGEST_ALIGNMENT__); + Drgba = __builtin_assume_aligned (Rrgba, __BIGGEST_ALIGNMENT__); + int i = 0; + for (; i < len*4; i += 4) + { + uint8_t Sa = Srgba[i + 3]; + Rrgba[i + 0] = DIV255(Srgba[i + 0] * 255 + Drgba[i + 0] * (255 - Sa)); + Rrgba[i + 1] = DIV255(Srgba[i + 1] * 255 + Drgba[i + 1] * (255 - Sa)); + Rrgba[i + 2] = DIV255(Srgba[i + 2] * 255 + Drgba[i + 2] * (255 - Sa)); + Rrgba[i + 3] = DIV255(Srgba[i + 3] * 255 + Drgba[i + 3] * (255 - Sa)); + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index b575b456301..803de3fc287 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1705,6 +1705,7 @@ vect_recog_over_widening_pattern (vec_info *vinfo, /* Apply the minimum efficient precision we just calculated. */ if (new_precision < min_precision) new_precision = min_precision; + new_precision = vect_element_precision (new_precision); if (new_precision >= TYPE_PRECISION (type)) return NULL; -- 2.26.2