On Thu, May 15, 2025 at 10:04 AM liuhongt <hongtao....@intel.com> wrote:
>
> 1) Optimize (a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b)
> 2) Optimize (a | b) - ((a ^ b) >> 1) to .AVG_CEIL (a, b)
>
> Prof is at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118994#c6
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?

OK.

Thanks,
Richard.

> gcc/ChangeLog:
>
>         PR middle-end/118994
>         * match.pd ((a >> 1) + (b >> 1) + ((a | b) & 1) to
>         .AVG_CEIL (a, b)): New pattern.
>         ((a | b) - ((a ^ b) >> 1) to .AVG_CEIL (a, b)): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr118994-1.c: New test.
>         * gcc.target/i386/pr118994-2.c: New test.
> ---
>  gcc/match.pd                               | 23 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr118994-1.c | 37 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr118994-2.c | 37 ++++++++++++++++++++++
>  3 files changed, 97 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118994-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118994-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 96136404f5e..d391ac86edc 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -11455,3 +11455,26 @@ and,
>        }
>        (if (full_perm_p)
>         (vec_perm (op@3 @0 @1) @3 @2))))))
> +
> +#if GIMPLE
> +/* Simplify (a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b).
> +   Similar for (a | b) - ((a ^ b) >> 1).  */
> +
> +(simplify
> +  (plus:c
> +    (plus (rshift @0 integer_onep@1) (rshift @2 @1))
> +    (bit_and (bit_ior @0 @2) integer_onep@3))
> +  (if (cfun && (cfun->curr_properties & PROP_last_full_fold) != 0
> +      && VECTOR_TYPE_P (type)
> +      && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, 
> OPTIMIZE_FOR_BOTH))
> +      (IFN_AVG_CEIL @0 @2)))
> +
> +(simplify
> +  (minus
> +    (bit_ior @0 @2)
> +    (rshift (bit_xor @0 @2) integer_onep@1))
> +  (if (cfun && (cfun->curr_properties & PROP_last_full_fold) != 0
> +      && VECTOR_TYPE_P (type)
> +      && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, 
> OPTIMIZE_FOR_BOTH))
> +      (IFN_AVG_CEIL @0 @2)))
> +#endif
> diff --git a/gcc/testsuite/gcc.target/i386/pr118994-1.c 
> b/gcc/testsuite/gcc.target/i386/pr118994-1.c
> new file mode 100644
> index 00000000000..5f40ababccc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr118994-1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512bw -mavx512vl -O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized"} } */
> +
> +#define VecRoundingAvg(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
> +
> +typedef unsigned char GccU8x16Vec __attribute__((__vector_size__(16)));
> +typedef unsigned short GccU16x8Vec __attribute__((__vector_size__(16)));
> +typedef unsigned char GccU8x32Vec __attribute__((__vector_size__(32)));
> +typedef unsigned short GccU16x16Vec __attribute__((__vector_size__(32)));
> +typedef unsigned char GccU8x64Vec __attribute__((__vector_size__(64)));
> +typedef unsigned short GccU16x32Vec __attribute__((__vector_size__(64)));
> +
> +GccU8x16Vec U8x16VecRoundingAvg(GccU8x16Vec a, GccU8x16Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU16x8Vec U16x8VecRoundingAvg(GccU16x8Vec a, GccU16x8Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU8x32Vec U8x32VecRoundingAvg(GccU8x32Vec a, GccU8x32Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU16x16Vec U16x16VecRoundingAvg(GccU16x16Vec a, GccU16x16Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU8x64Vec U8x64VecRoundingAvg(GccU8x64Vec a, GccU8x64Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU16x32Vec U16x32VecRoundingAvg(GccU16x32Vec a, GccU16x32Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/i386/pr118994-2.c 
> b/gcc/testsuite/gcc.target/i386/pr118994-2.c
> new file mode 100644
> index 00000000000..ba90e0a2992
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr118994-2.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512bw -mavx512vl -O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized"} } */
> +
> +#define VecRoundingAvg(a, b) ((a | b) - ((a ^ b) >> 1))
> +
> +typedef unsigned char GccU8x16Vec __attribute__((__vector_size__(16)));
> +typedef unsigned short GccU16x8Vec __attribute__((__vector_size__(16)));
> +typedef unsigned char GccU8x32Vec __attribute__((__vector_size__(32)));
> +typedef unsigned short GccU16x16Vec __attribute__((__vector_size__(32)));
> +typedef unsigned char GccU8x64Vec __attribute__((__vector_size__(64)));
> +typedef unsigned short GccU16x32Vec __attribute__((__vector_size__(64)));
> +
> +GccU8x16Vec U8x16VecRoundingAvg(GccU8x16Vec a, GccU8x16Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU16x8Vec U16x8VecRoundingAvg(GccU16x8Vec a, GccU16x8Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU8x32Vec U8x32VecRoundingAvg(GccU8x32Vec a, GccU8x32Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU16x16Vec U16x16VecRoundingAvg(GccU16x16Vec a, GccU16x16Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU8x64Vec U8x64VecRoundingAvg(GccU8x64Vec a, GccU8x64Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> +GccU16x32Vec U16x32VecRoundingAvg(GccU16x32Vec a, GccU16x32Vec b) {
> +  return VecRoundingAvg(a, b);
> +}
> +
> --
> 2.34.1
>

Reply via email to