Hi All,
Advanced SIMD lacks a cmpeq for vectors, and unlike compare to 0 we can't
rewrite to a cmtst.
This operation is however fairly common, especially now that we support early
break vectorization.
As such this adds a pattern to recognize the negated any comparison and
transform it to an all. i.e. any(~x) => all(x) and invert the branches.
For e.g.
void f1 (int x)
{
for (int i = 0; i < N; i++)
{
b[i] += a[i];
if (a[i] != x)
break;
}
}
We currently generate:
cmeq v31.4s, v30.4s, v29.4s
not v31.16b, v31.16b
umaxp v31.4s, v31.4s, v31.4s
fmov x5, d31
cbnz x5, .L2
and after this patch:
cmeq v31.4s, v30.4s, v29.4s
uminp v31.4s, v31.4s, v31.4s
fmov x5, d31
cbz x5, .L2
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (*cbranchnev4si): New.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/vect-early-break-cbranch_2.c: New test.
--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
cd5ec35c3f53028f14828bd70a92924f62524c15..b1a2c617d7d4106ab725d53a5d0b5c2fb61a0c78
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3870,6 +3870,37 @@ (define_expand "cbranch<mode>4"
DONE;
})
+;; Avdanced SIMD lacks a vector != comparison, but this is a quite common
+;; operation. To not pay the penalty for inverting == we can map our any
+;; comparisons to all i.e. any(~x) => all(x).
+(define_insn_and_split "*cbranchnev4si"
+ [(set (pc)
+ (if_then_else
+ (ne (subreg:DI
+ (unspec:V4SI
+ [(not:V4SI (match_operand:V4SI 0 "register_operand" "w"))
+ (not:V4SI (match_dup 0))]
+ UNSPEC_UMAXV) 0)
+ (const_int 0))
+ (label_ref (match_operand 1 ""))
+ (pc)))
+ (clobber (match_scratch:DI 2 "=w"))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_dup 2)
+ (unspec:V4SI [(match_dup 0) (match_dup 0)] UNSPEC_UMINV))
+ (set (pc)
+ (if_then_else
+ (eq (subreg:DI (match_dup 2) 0)
+ (const_int 0))
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+ if (can_create_pseudo_p ())
+ operands[2] = gen_reg_rtx (V4SImode);
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index
0000000000000000000000000000000000000000..e81027bb50138be627f4dfdffb1557893a5a7723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+
+/*
+** f1:
+** ...
+ cmeq v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+ uminp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+ fmov x[0-9]+, d[0-9]+
+ cbz x[0-9]+, \.L[0-9]+
+** ...
+*/
+void f1 (int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != x)
+ break;
+ }
+}
--
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
cd5ec35c3f53028f14828bd70a92924f62524c15..b1a2c617d7d4106ab725d53a5d0b5c2fb61a0c78
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3870,6 +3870,37 @@ (define_expand "cbranch<mode>4"
DONE;
})
+;; Avdanced SIMD lacks a vector != comparison, but this is a quite common
+;; operation. To not pay the penalty for inverting == we can map our any
+;; comparisons to all i.e. any(~x) => all(x).
+(define_insn_and_split "*cbranchnev4si"
+ [(set (pc)
+ (if_then_else
+ (ne (subreg:DI
+ (unspec:V4SI
+ [(not:V4SI (match_operand:V4SI 0 "register_operand" "w"))
+ (not:V4SI (match_dup 0))]
+ UNSPEC_UMAXV) 0)
+ (const_int 0))
+ (label_ref (match_operand 1 ""))
+ (pc)))
+ (clobber (match_scratch:DI 2 "=w"))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_dup 2)
+ (unspec:V4SI [(match_dup 0) (match_dup 0)] UNSPEC_UMINV))
+ (set (pc)
+ (if_then_else
+ (eq (subreg:DI (match_dup 2) 0)
+ (const_int 0))
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+ if (can_create_pseudo_p ())
+ operands[2] = gen_reg_rtx (V4SImode);
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index
0000000000000000000000000000000000000000..e81027bb50138be627f4dfdffb1557893a5a7723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+
+/*
+** f1:
+** ...
+ cmeq v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+ uminp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+ fmov x[0-9]+, d[0-9]+
+ cbz x[0-9]+, \.L[0-9]+
+** ...
+*/
+void f1 (int x)
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != x)
+ break;
+ }
+}