Hi,

Sending a new version of the patch because I noticed the pattern was overriding 
the nor pattern.

A second pattern is needed to capture the nor case as combine will match the
longest sequence first.  So without this pattern we end up de-optimizing nor
and instead emit two nots.  I did not find a better way to do this.

Note: This patch series is working incrementally towards generating the most
      efficient code for this and other loops in small steps.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-sve.md (*fcm<cmp_op><mode>_bic_combine,
        *fcm<cmp_op><mode>_nor_combine, *fcmuo<mode>_bic_combine,
        *fcmuo<mode>_nor_combine): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/pred-not-gen.c-1: New test.
        * gcc.target/aarch64/sve/pred-not-gen.c-2: New test.
        * gcc.target/aarch64/sve/pred-not-gen.c-3: New test.
        * gcc.target/aarch64/sve/pred-not-gen.c-4: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 
359fe0e457096cf4042a774789a5c241420703d3..8fe4c721313e70592d2cf0acbfbe2f07b070b51a
 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8126,6 +8126,160 @@ (define_insn_and_split "*fcmuo<mode>_and_combine"
          UNSPEC_COND_FCMUO))]
 )
 
+;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
+;; In this case, we still need a separate NOT/BIC operation, but predicating
+;; the comparison on the BIC operand removes the need for a PTRUE.
+(define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (and:<VPRED>
+         (and:<VPRED>
+           (not:<VPRED>
+             (unspec:<VPRED>
+               [(match_operand:<VPRED> 1)
+                (const_int SVE_KNOWN_PTRUE)
+                (match_operand:SVE_FULL_F 2 "register_operand" "w")
+                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+               SVE_COND_FP_CMP_I0))
+           (match_operand:<VPRED> 4 "register_operand" "Upa"))
+         (match_dup:<VPRED> 1)))
+   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 5)
+       (unspec:<VPRED>
+         [(match_dup 4)
+          (const_int SVE_MAYBE_NOT_PTRUE)
+          (match_dup 2)
+          (match_dup 3)]
+         SVE_COND_FP_CMP_I0))
+   (set (match_dup 0)
+       (and:<VPRED>
+         (not:<VPRED>
+           (match_dup 5))
+         (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (<VPRED>mode);
+}
+)
+
+;; Make sure that we expand to a nor when the operand 4 of
+;; *fcm<cmp_op><mode>_bic_combine is a not.
+(define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (and:<VPRED>
+         (and:<VPRED>
+           (not:<VPRED>
+             (unspec:<VPRED>
+               [(match_operand:<VPRED> 1)
+                (const_int SVE_KNOWN_PTRUE)
+                (match_operand:SVE_FULL_F 2 "register_operand" "w")
+                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+               SVE_COND_FP_CMP_I0))
+           (not:<VPRED>
+             (match_operand:<VPRED> 4 "register_operand" "Upa")))
+         (match_dup:<VPRED> 1)))
+   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 5)
+       (unspec:<VPRED>
+         [(match_dup 1)
+          (const_int SVE_KNOWN_PTRUE)
+          (match_dup 2)
+          (match_dup 3)]
+         SVE_COND_FP_CMP_I0))
+   (set (match_dup 0)
+       (and:<VPRED>
+         (and:<VPRED>
+           (not:<VPRED>
+             (match_dup 5))
+           (not:<VPRED>
+             (match_dup 4)))
+         (match_dup 1)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (<VPRED>mode);
+}
+)
+
+(define_insn_and_split "*fcmuo<mode>_bic_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (and:<VPRED>
+         (and:<VPRED>
+           (not:<VPRED>
+             (unspec:<VPRED>
+               [(match_operand:<VPRED> 1)
+                (const_int SVE_KNOWN_PTRUE)
+                (match_operand:SVE_FULL_F 2 "register_operand" "w")
+                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+               UNSPEC_COND_FCMUO))
+           (match_operand:<VPRED> 4 "register_operand" "Upa"))
+         (match_dup:<VPRED> 1)))
+   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 5)
+       (unspec:<VPRED>
+         [(match_dup 4)
+          (const_int SVE_MAYBE_NOT_PTRUE)
+          (match_dup 2)
+          (match_dup 3)]
+         UNSPEC_COND_FCMUO))
+   (set (match_dup 0)
+       (and:<VPRED>
+         (not:<VPRED>
+           (match_dup 5))
+         (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (<VPRED>mode);
+}
+)
+
+;; Same for unordered comparisons.
+(define_insn_and_split "*fcmuo<mode>_nor_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (and:<VPRED>
+         (and:<VPRED>
+           (not:<VPRED>
+             (unspec:<VPRED>
+               [(match_operand:<VPRED> 1)
+                (const_int SVE_KNOWN_PTRUE)
+                (match_operand:SVE_FULL_F 2 "register_operand" "w")
+                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+               UNSPEC_COND_FCMUO))
+           (not:<VPRED>
+             (match_operand:<VPRED> 4 "register_operand" "Upa")))
+         (match_dup:<VPRED> 1)))
+   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 5)
+       (unspec:<VPRED>
+         [(match_dup 1)
+          (const_int SVE_KNOWN_PTRUE)
+          (match_dup 2)
+          (match_dup 3)]
+         UNSPEC_COND_FCMUO))
+   (set (match_dup 0)
+       (and:<VPRED>
+         (and:<VPRED>
+           (not:<VPRED>
+             (match_dup 5))
+           (not:<VPRED>
+             (match_dup 4)))
+         (match_dup 1)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (<VPRED>mode);
+}
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Absolute comparisons
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..2c06564186c5a5e7917da475a9c201c81dfeb136
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
@@ -0,0 +1,23 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+/*
+** f10:
+** ...
+**     ld1d    z1.d, p0/z, \[x1, x5, lsl 3\]
+**     fcmgt   p2.d, p0/z, z1.d, #0.0
+**     ld1d    z2.d, p2/z, \[x2, x5, lsl 3\]
+**     not     p1.b, p0/z, p2.b
+**     ld1d    z0.d, p1/z, \[x3, x5, lsl 3\]
+** ...
+*/
+
+void f10(double * restrict z, double * restrict w, double * restrict x, double 
* restrict y, int n)
+{
+    for (int i = 0; i < n; i++) {
+        z[i] = (w[i] > 0) ? x[i] + w[i] : y[i] - w[i];
+    }
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..0c3b78d4c67455c971e94fb2ffdd7be2d4884864
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
@@ -0,0 +1,23 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+/*
+** f11:
+** ...
+**     ld1d    z0.d, p0/z, \[x1, x2, lsl 3\]
+**     fcmgt   p2.d, p3/z, z0.d, #0.0
+**     fcmgt   p1.d, p0/z, z0.d, #0.0
+**     not     p1.b, p0/z, p1.b
+**     ld1d    z1.d, p1/z, \[x3, x2, lsl 3\]
+** ...
+*/
+
+void f11(double * restrict z, double * restrict w, double * restrict x, double 
* restrict y, int n)
+{
+    for (int i = 0; i < n; i++) {
+        z[i] = (w[i] > 0) ? w[i] : y[i];
+    }
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..248f8ab57191ce8a1d4c334533de8bc76aa07691
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+/*
+** f12:
+** ...
+**     ld1w    z1.s, p0/z, \[x1, x2, lsl 2\]
+**     cmple   p1.s, p0/z, z1.s, #0
+**     ld1w    z0.s, p1/z, \[x3, x2, lsl 2\]
+** ...
+*/
+
+void f12(int * restrict z, int * restrict w, int * restrict x, int * restrict 
y, int n)
+{
+    for (int i = 0; i < n; i++) {
+        z[i] = (w[i] > 0) ? w[i] : y[i];
+    }
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..96200309880a91ad1db5801115c911cfdce06125
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <math.h>
+
+void f13(double * restrict z, double * restrict w, double * restrict x, double 
* restrict y, int n)
+{
+    for (int i = 0; i < n; i++) {
+        z[i] = (isunordered(w[i], 0)) ? x[i] + w[i] : y[i] - w[i];
+    }
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */

Attachment: rb14776 (1).patch
Description: rb14776 (1).patch

Reply via email to