Hi:
  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
  Ok for trunk?

gcc/ChangeLog:

        PR rtl-optimization/101796
        * simplify-rtx.c
        (simplify_context::simplify_binary_operation_1): Simplify
        vector shift/rotate with const_vec_duplicate to vector
        shift/rotate with const_int element.

gcc/testsuite/ChangeLog:

        PR rtl-optimization/101796
        * gcc.target/i386/pr101796.c: New test.
---
 gcc/simplify-rtx.c                       | 15 ++++++
 gcc/testsuite/gcc.target/i386/pr101796.c | 65 ++++++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101796.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a719f57870f..75f3e455562 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3970,6 +3970,21 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
            return simplify_gen_binary (code, mode, op0,
                                        gen_int_shift_amount (mode, val));
        }
+
+      /* Optimize vector shift/rotate with const_vec_duplicate
+        to vector shift/rotate with const_int element.
+      /* TODO: vec_duplicate with variable can also be simplified,
+        but GCC only require operand 2 of shift/rotate to be a scalar type
+        which can have different modes in different backends, it makes
+        simplication difficult to decide which mode should be choosed
+        for shift/rotate count.  */
+      if ((code == ASHIFTRT || code == LSHIFTRT
+          || code == ASHIFT || code == ROTATERT
+          || code == ROTATE)
+         && const_vec_duplicate_p (op1))
+       return simplify_gen_binary (code, mode, op0,
+                                   unwrap_const_vec_duplicate (op1));
+
       break;
 
     case ASHIFT:
diff --git a/gcc/testsuite/gcc.target/i386/pr101796.c 
b/gcc/testsuite/gcc.target/i386/pr101796.c
new file mode 100644
index 00000000000..c22d6267fe5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101796.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2 " } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } }  */
+/* { dg-final { scan-assembler-not "vpsrlv\[dwq\]" } }  */
+/* { dg-final { scan-assembler-not "vpsllv\[dwq\]" } }  */
+/* { dg-final { scan-assembler-not "vpsrav\[dwq\]" } }  */
+/* { dg-final { scan-assembler-times "vpsrl\[dwq\]" 3 } }  */
+/* { dg-final { scan-assembler-times "vpsll\[dwq\]" 3 } }  */
+/* { dg-final { scan-assembler-times "vpsra\[dwq\]" 3 } }  */
+
+#include <immintrin.h>
+
+__m512i
+foo (__m512i a)
+{
+  return _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
+}
+
+__m512i
+foo1 (__m512i a)
+{
+  return _mm512_srlv_epi32 (a, _mm512_set1_epi32 (3));
+}
+
+__m512i
+foo2 (__m512i a, long long b)
+{
+  return _mm512_srlv_epi64 (a, _mm512_set1_epi64 (3));
+}
+
+__m512i
+foo3 (__m512i a)
+{
+  return _mm512_srav_epi16 (a, _mm512_set1_epi16 (3));
+}
+
+__m512i
+foo4 (__m512i a)
+{
+  return _mm512_srav_epi32 (a, _mm512_set1_epi32 (3));
+}
+
+__m512i
+foo5 (__m512i a, long long b)
+{
+  return _mm512_srav_epi64 (a, _mm512_set1_epi64 (3));
+}
+
+__m512i
+foo6 (__m512i a)
+{
+  return _mm512_sllv_epi16 (a, _mm512_set1_epi16 (3));
+}
+
+__m512i
+foo7 (__m512i a)
+{
+  return _mm512_sllv_epi32 (a, _mm512_set1_epi32 (3));
+}
+
+__m512i
+foo8 (__m512i a, long long b)
+{
+  return _mm512_sllv_epi64 (a, _mm512_set1_epi64 (3));
+}
-- 
2.27.0

Reply via email to