From: Pan Li <[email protected]>

The vwsll.vi of zvbb ext take zero extend before ashift.  But
we can still do some combine based on sign extend if and only
if the shift is imm and the sign extend bits are all shifted.
For example as below

  vsetvli   zero, zero, e32, m1, ta, ma
  vsext.vf2 v1, v2
  vsll.vi   v1, v1, 16

If the ashift bits is greater than or equals to truncated bitsize,
(aka 16 for e32), the sign or zero extend bits will be ashifted
and never pollute the final result.  Then we have

  vsetvli   zero, zero, e32, m1, ta, ma
  vwsll.vi  v1, v2, 16

        PR target.121959

The below test suites are passed for this patch series.
 * The rv64gcv fully regression test.

gcc/ChangeLog:

        * config/riscv/autovec-opt.md (*vwsll_sign_extend_<mode>): Add
        pattern to combine vsext.vf2 and vslli.vi to vwsll.vi.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr121959-1.c: New test.
        * gcc.target/riscv/rvv/autovec/pr121959-2.c: New test.
        * gcc.target/riscv/rvv/autovec/pr121959-3.c: New test.
        * gcc.target/riscv/rvv/autovec/pr121959-4.c: New test.
        * gcc.target/riscv/rvv/autovec/pr121959-5.c: New test.
        * gcc.target/riscv/rvv/autovec/pr121959-run-1.c: New test.
        * gcc.target/riscv/rvv/autovec/pr121959.h: New test.

Signed-off-by: Pan Li <[email protected]>
---
 gcc/config/riscv/autovec-opt.md               | 41 ++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr121959-1.c |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-2.c |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-3.c |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-4.c |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-5.c |  9 +++
 .../riscv/rvv/autovec/pr121959-run-1.c        | 65 +++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr121959.h   | 24 +++++++
 8 files changed, 175 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 3d6e0a19c8a..12e481810ae 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -2399,3 +2399,44 @@ (define_insn_and_split "*vfrsub_vf_<mode>"
   }
   [(set_attr "type" "vfalu")]
 )
+
+;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB.
+;; The vwsll.vi is zero extend, thus only the ashift bits
+;; is equal or greater than double truncated bits is valid.
+;; Appears in the satd function of x264.
+(define_insn_and_split "*vwsll_sign_extend_<mode>"
+  [(set (match_operand:VWEXTI               0 "register_operand")
+       (ashift:VWEXTI
+         (sign_extend:VWEXTI
+           (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+         (match_operand                    2 "const_int_operand")))]
+  "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    int imm = INTVAL (operands[2]);
+    int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER 
(<V_DOUBLE_TRUNC>mode));
+
+    if (imm >= trunc_prec)
+      {
+       insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode);
+       emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+      }
+    else
+      {
+       insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode);
+       rtx extend = gen_reg_rtx (<MODE>mode);
+       rtx unary_ops[] = {extend, operands[1]};
+       riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP,
+                                      unary_ops);
+
+       icode = code_for_pred_scalar (ASHIFT, <MODE>mode);
+       rtx binary_ops[] = {operands[0], extend, operands[2]};
+       riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP,
+                                      binary_ops);
+      }
+
+    DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c
new file mode 100644
index 00000000000..a42d7c4de60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c
new file mode 100644
index 00000000000..2a3ef8d2617
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c
new file mode 100644
index 00000000000..59a930a1efa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c
new file mode 100644
index 00000000000..59a6d365af4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c
new file mode 100644
index 00000000000..a9319a3a959
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c
new file mode 100644
index 00000000000..77fd95b8ebb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "pr121959.h"
+
+#define WT int32_t
+#define NT uint8_t
+#define IMM 16
+#define N 16
+
+DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM)
+
+NT g_data[][2][N] = {
+  {
+    /* a */
+    {
+        2,   2,   2,   1,
+      255, 255, 255, 255,
+      128, 128, 128, 128,
+      127, 127, 127, 127,
+    },
+    /* b */
+    {
+      1, 1, 1, 1,
+      0, 0, 0, 0,
+      2, 2, 2, 2,
+      7, 7, 7, 7,
+    },
+  },
+};
+
+WT g_expect[][N] = {
+  /* 0 */
+  {
+       65536,    65536,    65536,    65536,
+    16711680, 16711680, 16711680, 16711680,
+     8257536,  8257536,  8257536,  8257536,
+     7864320,  7864320,  7864320,  7864320,
+  },
+};
+
+int
+main ()
+{
+  unsigned i, k;
+  WT out[N];
+
+  for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++)
+    {
+      NT *a = g_data[i][0];
+      NT *b = g_data[i][1];
+      WT *expect = g_expect[i];
+
+      RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N);
+
+      for (k = 0; k < N; k++)
+       if (out[k] != expect[k])
+         __builtin_abort ();
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h
new file mode 100644
index 00000000000..10b1b623979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h
@@ -0,0 +1,24 @@
+#ifndef HAVE_DEFINED_PR121959_H
+#define HAVE_DEFINED_PR121959_H
+
+#include <stdint.h>
+
+#define DEF_VWSLL_FUNC_0(WT, NT, IMM)                   \
+void                                                    \
+test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \
+                                    NT * restrict a,   \
+                                    NT * restrict b,   \
+                                    int n)             \
+{                                                       \
+  for (int i = 0; i < n; i++)                           \
+    {                                                   \
+      res[i] = (a[i] - b[i]) << IMM;                    \
+    }                                                   \
+}
+#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM)
+#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \
+  test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n)
+#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \
+  RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n)
+
+#endif
-- 
2.43.0

Reply via email to