Hi:
  For rtx like
  (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
                   (parallel [(const_int 0) (const_int 1)]))
 it could be simplified as inner.

  Bootstrap is ok, regression test on i386 backend is ok.

gcc/ChangeLog
        PR rtl-optimization/97249
        * simplify-rtx.c (simplify_binary_operation_1): Simplify
        vec_select of paradoxical subreg.

gcc/testsuite/ChangeLog

        * gcc.target/i386/pr97249-1.c: New test.

-- 
BR,
Hongtao
From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of paradoxical subreg.

For rtx like
  (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
		   (parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of paradoxical subreg.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
---
 gcc/simplify-rtx.c                        | 27 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..9c397157f28 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		    return subop1;
 		}
 	    }
+
+	  /* For cases like
+	     (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
+			      (parallel [(const_int 0) (const_int 1)])).
+	     return inner directly.  */
+	  if (GET_CODE (trueop0) == SUBREG
+	      && paradoxical_subreg_p (trueop0)
+	      && mode == GET_MODE (XEXP (trueop0, 0))
+	      && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0)
+	      && (GET_MODE_NUNITS (mode)).is_constant (&l1)
+	      && l0 % l1 == 0)
+	    {
+	      gcc_assert (known_eq (XVECLEN (trueop1, 0), l1));
+	      unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1;
+	      unsigned HOST_WIDE_INT sel = 0;
+	      int i = 0;
+	      for (;i != l1; i++)
+		{
+		  rtx j = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (j))
+		    break;
+		  sel |= HOST_WIDE_INT_1U << UINTVAL (j);
+		}
+	      /* ??? Need to simplify XEXP (trueop0, 0) here.  */
+	      if (sel == expect)
+		return XEXP (trueop0, 0);
+	    }
 	}
 
       if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..bc34aa8baa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+    for (int i = 0 ; i != 8; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+    for (int i = 0 ; i != 4; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+    for (int i = 0 ; i != 2; i++)
+      p3[i] = (long long)p1[i] + (long long)p2[i];
+     return;
+}
-- 
2.18.1

From c00369aa36d2e169b59287c58872c915953dd2a2 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of paradoxical subreg.

For rtx like
  (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
		   (parallel [(const_int 0) (const_int 1)]))
it could be simplified as inner.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of paradoxical subreg.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
---
 gcc/simplify-rtx.c                        | 27 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 +++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..9c397157f28 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,33 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		    return subop1;
 		}
 	    }
+
+	  /* For cases like
+	     (vec_select:V2SI (subreg:V4SI (inner:V2SI) 0)
+			      (parallel [(const_int 0) (const_int 1)])).
+	     return inner directly.  */
+	  if (GET_CODE (trueop0) == SUBREG
+	      && paradoxical_subreg_p (trueop0)
+	      && mode == GET_MODE (XEXP (trueop0, 0))
+	      && (GET_MODE_NUNITS (GET_MODE (trueop0))).is_constant (&l0)
+	      && (GET_MODE_NUNITS (mode)).is_constant (&l1)
+	      && l0 % l1 == 0)
+	    {
+	      gcc_assert (known_eq (XVECLEN (trueop1, 0), l1));
+	      unsigned HOST_WIDE_INT expect = (HOST_WIDE_INT_1U << l1) - 1;
+	      unsigned HOST_WIDE_INT sel = 0;
+	      int i = 0;
+	      for (;i != l1; i++)
+		{
+		  rtx j = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (j))
+		    break;
+		  sel |= HOST_WIDE_INT_1U << UINTVAL (j);
+		}
+	      /* ??? Need to simplify XEXP (trueop0, 0) here.  */
+	      if (sel == expect)
+		return XEXP (trueop0, 0);
+	    }
 	}
 
       if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..bc34aa8baa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \t\]+\\\(\[^\n\]*%xmm\[0-9\](?:\n|\[ \t\]+#)" 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+    for (int i = 0 ; i != 8; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+    for (int i = 0 ; i != 4; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+    for (int i = 0 ; i != 2; i++)
+      p3[i] = (long long)p1[i] + (long long)p2[i];
+     return;
+}
-- 
2.18.1

Reply via email to