https://gcc.gnu.org/g:72f0b446d2c03866ebe4cae125e32fef598e924d
commit r16-767-g72f0b446d2c03866ebe4cae125e32fef598e924d Author: liuhongt <hongtao....@intel.com> Date: Tue Feb 25 22:48:27 2025 -0800 Add pattern match in match.pd for .AVG_CEIL 1) Optimize (a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b) 2) Optimize (a | b) - ((a ^ b) >> 1) to .AVG_CEIL (a, b) gcc/ChangeLog: PR middle-end/118994 * match.pd ((a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b)): New pattern. ((a | b) - ((a ^ b) >> 1) to .AVG_CEIL (a, b)): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr118994-1.c: New test. * gcc.target/i386/pr118994-2.c: New test. Diff: --- gcc/match.pd | 23 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr118994-1.c | 37 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr118994-2.c | 37 ++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 3b827df4cd6c..27f662f9714b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -11486,3 +11486,26 @@ and, } (if (full_perm_p) (vec_perm (op@3 @0 @1) @3 @2)))))) + +#if GIMPLE +/* Simplify (a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b). + Similar for (a | b) - ((a ^ b) >> 1). */ + +(simplify + (plus:c + (plus (rshift @0 integer_onep@1) (rshift @2 @1)) + (bit_and (bit_ior @0 @2) integer_onep@3)) + (if (cfun && (cfun->curr_properties & PROP_last_full_fold) != 0 + && VECTOR_TYPE_P (type) + && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, OPTIMIZE_FOR_BOTH)) + (IFN_AVG_CEIL @0 @2))) + +(simplify + (minus + (bit_ior @0 @2) + (rshift (bit_xor @0 @2) integer_onep@1)) + (if (cfun && (cfun->curr_properties & PROP_last_full_fold) != 0 + && VECTOR_TYPE_P (type) + && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, OPTIMIZE_FOR_BOTH)) + (IFN_AVG_CEIL @0 @2))) +#endif diff --git a/gcc/testsuite/gcc.target/i386/pr118994-1.c b/gcc/testsuite/gcc.target/i386/pr118994-1.c new file mode 100644 index 000000000000..5f40ababcccf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr118994-1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512vl -O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized"} } */ + +#define VecRoundingAvg(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1)) + +typedef unsigned char GccU8x16Vec __attribute__((__vector_size__(16))); +typedef unsigned short GccU16x8Vec __attribute__((__vector_size__(16))); +typedef unsigned char GccU8x32Vec __attribute__((__vector_size__(32))); +typedef unsigned short GccU16x16Vec __attribute__((__vector_size__(32))); +typedef unsigned char GccU8x64Vec __attribute__((__vector_size__(64))); +typedef unsigned short GccU16x32Vec __attribute__((__vector_size__(64))); + +GccU8x16Vec U8x16VecRoundingAvg(GccU8x16Vec a, GccU8x16Vec b) { + return VecRoundingAvg(a, b); +} + +GccU16x8Vec U16x8VecRoundingAvg(GccU16x8Vec a, GccU16x8Vec b) { + return VecRoundingAvg(a, b); +} + +GccU8x32Vec U8x32VecRoundingAvg(GccU8x32Vec a, GccU8x32Vec b) { + return VecRoundingAvg(a, b); +} + +GccU16x16Vec U16x16VecRoundingAvg(GccU16x16Vec a, GccU16x16Vec b) { + return VecRoundingAvg(a, b); +} + +GccU8x64Vec U8x64VecRoundingAvg(GccU8x64Vec a, GccU8x64Vec b) { + return VecRoundingAvg(a, b); +} + +GccU16x32Vec U16x32VecRoundingAvg(GccU16x32Vec a, GccU16x32Vec b) { + return VecRoundingAvg(a, b); +} + diff --git a/gcc/testsuite/gcc.target/i386/pr118994-2.c b/gcc/testsuite/gcc.target/i386/pr118994-2.c new file mode 100644 index 000000000000..ba90e0a2992a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr118994-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512vl -O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized"} } */ + +#define VecRoundingAvg(a, b) ((a | b) - ((a ^ b) >> 1)) + +typedef unsigned char GccU8x16Vec __attribute__((__vector_size__(16))); +typedef unsigned short GccU16x8Vec __attribute__((__vector_size__(16))); +typedef unsigned char GccU8x32Vec __attribute__((__vector_size__(32))); +typedef unsigned short GccU16x16Vec __attribute__((__vector_size__(32))); +typedef unsigned char GccU8x64Vec __attribute__((__vector_size__(64))); +typedef unsigned short GccU16x32Vec __attribute__((__vector_size__(64))); + +GccU8x16Vec U8x16VecRoundingAvg(GccU8x16Vec a, GccU8x16Vec b) { + return VecRoundingAvg(a, b); +} + +GccU16x8Vec U16x8VecRoundingAvg(GccU16x8Vec a, GccU16x8Vec b) { + return VecRoundingAvg(a, b); +} + +GccU8x32Vec U8x32VecRoundingAvg(GccU8x32Vec a, GccU8x32Vec b) { + return VecRoundingAvg(a, b); +} + +GccU16x16Vec U16x16VecRoundingAvg(GccU16x16Vec a, GccU16x16Vec b) { + return VecRoundingAvg(a, b); +} + +GccU8x64Vec U8x64VecRoundingAvg(GccU8x64Vec a, GccU8x64Vec b) { + return VecRoundingAvg(a, b); +} + +GccU16x32Vec U16x32VecRoundingAvg(GccU16x32Vec a, GccU16x32Vec b) { + return VecRoundingAvg(a, b); +} +