Hi: For rtx like (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0) (parallel [(const_int 0) (const_int 1)])) it could be simplified as inner.
Bootstrap is ok, regression test on i386 backend is ok. gcc/ChangeLog PR rtl-optimization/97249 * simplify-rtx.c (simplify_binary_operation_1): Simplify vec_select of paradoxical subreg. gcc/testsuite/ChangeLog * gcc.target/i386/pr97249-1.c: New test. -- BR, Hongtao
From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Tue, 13 Oct 2020 15:35:29 +0800 Subject: [PATCH] Simplify vec_select of paradoxical subreg. For rtx like (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0) (parallel [(const_int 0) (const_int 1)])) it could be simplified as inner. gcc/ChangeLog PR rtl-optimization/97249 * simplify-rtx.c (simplify_binary_operation_1): Simplify vec_select of paradoxical subreg. gcc/testsuite/ChangeLog * gcc.target/i386/pr97249-1.c: New test. --- gcc/simplify-rtx.c | 27 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 869f0d11b2e..9c397157f28 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return subop1; } } + + /* For cases like + (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0) + (parallel [(const_int 0) (const_int 1)])). + return inner directly. */ + if (GET_CODE (trueop0) == SUBREG + && paradoxical_subreg_p (trueop0) + && mode == GET_MODE (XEXP (trueop0, 0)) + && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0) + && (GET_MODE_NUNITS (mode)).is_constant (&l1) + && l0 % l1 == 0) + { + gcc_assert (known_eq (XVECLEN (trueop1, 0), l1)); + unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1; + unsigned HOST_WIDE_INT sel = 0; + int i = 0; + for (;i != l1; i++) + { + rtx j = XVECEXP (trueop1, 0, i); + if (!CONST_INT_P (j)) + break; + sel |= HOST_WIDE_INT_1U << UINTVAL (j); + } + /* ??? Need to simplify XEXP (trueop0, 0) here. */ + if (sel == expect) + return XEXP (trueop0, 0); + } } if (XVECLEN (trueop1, 0) == 1 diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c new file mode 100644 index 00000000000..bc34aa8baa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c @@ -0,0 +1,30 @@ +/* PR target/97249 */ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3 -masm=att" } */ +/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */ + +void +foo (unsigned char* p1, unsigned char* p2, short* __restrict p3) +{ + for (int i = 0 ; i != 8; i++) + p3[i] = p1[i] + p2[i]; + return; +} + +void +foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3) +{ + for (int i = 0 ; i != 4; i++) + p3[i] = p1[i] + p2[i]; + return; +} + +void +foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3) +{ + for (int i = 0 ; i != 2; i++) + p3[i] = (long long)p1[i] + (long long)p2[i]; + return; +} -- 2.18.1
From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Tue, 13 Oct 2020 15:35:29 +0800 Subject: [PATCH] Simplify vec_select of paradoxical subreg. For rtx like (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0) (parallel [(const_int 0) (const_int 1)])) it could be simplified as inner. gcc/ChangeLog PR rtl-optimization/97249 * simplify-rtx.c (simplify_binary_operation_1): Simplify vec_select of paradoxical subreg. gcc/testsuite/ChangeLog * gcc.target/i386/pr97249-1.c: New test. --- gcc/simplify-rtx.c | 27 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 869f0d11b2e..9c397157f28 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return subop1; } } + + /* For cases like + (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0) + (parallel [(const_int 0) (const_int 1)])). + return inner directly. */ + if (GET_CODE (trueop0) == SUBREG + && paradoxical_subreg_p (trueop0) + && mode == GET_MODE (XEXP (trueop0, 0)) + && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0) + && (GET_MODE_NUNITS (mode)).is_constant (&l1) + && l0 % l1 == 0) + { + gcc_assert (known_eq (XVECLEN (trueop1, 0), l1)); + unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1; + unsigned HOST_WIDE_INT sel = 0; + int i = 0; + for (;i != l1; i++) + { + rtx j = XVECEXP (trueop1, 0, i); + if (!CONST_INT_P (j)) + break; + sel |= HOST_WIDE_INT_1U << UINTVAL (j); + } + /* ??? Need to simplify XEXP (trueop0, 0) here. */ + if (sel == expect) + return XEXP (trueop0, 0); + } } if (XVECLEN (trueop1, 0) == 1 diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c new file mode 100644 index 00000000000..bc34aa8baa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c @@ -0,0 +1,30 @@ +/* PR target/97249 */ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3 -masm=att" } */ +/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */ + +void +foo (unsigned char* p1, unsigned char* p2, short* __restrict p3) +{ + for (int i = 0 ; i != 8; i++) + p3[i] = p1[i] + p2[i]; + return; +} + +void +foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3) +{ + for (int i = 0 ; i != 4; i++) + p3[i] = p1[i] + p2[i]; + return; +} + +void +foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3) +{ + for (int i = 0 ; i != 2; i++) + p3[i] = (long long)p1[i] + (long long)p2[i]; + return; +} -- 2.18.1