In the example

void f1 ()
{
  for (int i = 0; i < N; i++)
    {
      b[i] += a[i];
      if (a[i] > 0)
        break;
    }
}

when compiled for SVE we generate:

        ld1w    z28.s, p7/z, [x4, x0, lsl 2]
        cmpgt   p14.s, p7/z, z28.s, #0
        ptest   p15, p14.b
        b.none  .L3

Where the ptest isn't needed since the branch only cares about the Z and NZ
flags.

GCC Today supports eliding this through the pattern *cmp<cmp_op><mode>_ptest
however this pattern only supports the removal when the outermost context is a
CMP where the predicate is inside the condition itself.

This typically only happens for an unpredicated CMP as a ptrue will be generated
during expand.

In the case about at the GIMPLE level we have

  mask_patt_14.15_57 = vect__2.11_52 > { 0, ... };
  vec_mask_and_58 = loop_mask_48 & mask_patt_14.15_57;
  if (vec_mask_and_58 != { 0, ... })
    goto <bb 5>; [5.50%]
  else
    goto <bb 6>; [94.50%]

where the loop mask is applied to the compare as an AND.

The loop mask is moved into the compare by the pattern *cmp<cmp_op><mode>_and
which moves the mask inside if the current mask is a ptrue since
p && true -> p.

However this happens after combine, and so we can't both move the predicate
inside AND eliminate the ptests.

To fix this this patch adds a new pattern *cmp<cmp_op><mode>_and_ptest which
combines these two patterns together allowing us to both push the predicate
inside and eliminate the ptest.

After this patch we generate

        ld1w    z28.s, p7/z, [x4, x0, lsl 2]
        cmpgt   p14.s, p7/z, z28.s, #0
        b.none  .L3

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        PR target/118974
        * config/aarch64/aarch64-sve.md (*cmp<cmp_op><mode>_and_ptest): New.

gcc/testsuite/ChangeLog:

        PR target/118974
        * gcc.target/aarch64/sve/pr119351.c: Update codegen.
        * gcc.target/aarch64/sve/vect-early-break-cbranch.c: Likewise.

---
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 
bf7569f932b6d7392b9c4fb7b94efafb6fd184c2..fe7f52ee1ed400b4eda28e3f90edc0044a5aa7a9
 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8319,6 +8319,40 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
   }
 )
 
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND in which only the flags result is interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_and_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand:VNx16BI 1 "register_operand")
+          (match_operand 4)
+          (const_int SVE_KNOWN_PTRUE)
+          (and:<VPRED>
+            (unspec:<VPRED>
+              [(match_operand 5)
+               (const_int SVE_KNOWN_PTRUE)
+               (SVE_INT_CMP:<VPRED>
+                 (match_operand:SVE_I 2 "register_operand")
+                 (match_operand:SVE_I 3 
"aarch64_sve_cmp_<sve_imm_con>_operand"))]
+              UNSPEC_PRED_Z)
+            (match_operand:<VPRED> 6 "register_operand"))]
+         UNSPEC_PTEST))
+   (clobber (match_scratch:<VPRED> 0))]
+  "TARGET_SVE"
+  {@ [ cons: =0, 1    , 2 , 3            ; attrs: pred_clobber ]
+     [ &Upa    ,  Upl, w , <sve_imm_con>; yes                 ] 
cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, #%3
+     [ ?Upl    ,  0  , w , <sve_imm_con>; yes                 ] ^
+     [ Upa     ,  Upl, w , <sve_imm_con>; no                  ] ^
+     [ &Upa    ,  Upl, w , w            ; yes                 ] 
cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, %3.<Vetype>
+     [ ?Upl    ,  0  , w , w            ; yes                 ] ^
+     [ Upa     ,  Upl, w , w            ; no                  ] ^
+  }
+  "&& !rtx_equal_p (operands[4], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[4]);
+  }
+)
+
 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
 ;; comparison with an AND.  Split the instruction into its preferred form
 ;; at the earliest opportunity, in order to get rid of the redundant
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index 
85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a
 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32)));
 **     ...
 **     ld1w    z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
 **     cmple   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     ...
 */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
index 
d7cef1105410be04ed67d1d3b800746267f205a8..8bd6fafc4d4248cf0acf7dfa2f07cd005f13de35
 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
@@ -8,7 +8,6 @@ int b[N] = {0};
 ** f1:
 **     ...
 **     cmpgt   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     b.(any|none)    \.L[0-9]+
 **     ...
 */
@@ -25,7 +24,6 @@ void f1 ()
 ** f2:
 **     ...
 **     cmpge   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     b.(any|none)    \.L[0-9]+
 **     ...
 */
@@ -42,7 +40,6 @@ void f2 ()
 ** f3:
 **     ...
 **     cmpeq   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     b.(any|none)    \.L[0-9]+
 **     ...
 */
@@ -59,7 +56,6 @@ void f3 ()
 ** f4:
 **     ...
 **     cmpne   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     b.(any|none)    \.L[0-9]+
 **     ...
 */
@@ -76,7 +72,6 @@ void f4 ()
 ** f5:
 **     ...
 **     cmplt   p[0-9]+.s, p7/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     b.(any|none)    .L[0-9]+
 **     ...
 */
@@ -93,7 +88,6 @@ void f5 ()
 ** f6:
 **     ...
 **     cmple   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**     ptest   p[0-9]+, p[0-9]+.b
 **     b.(any|none)    \.L[0-9]+
 **     ...
 */


-- 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index bf7569f932b6d7392b9c4fb7b94efafb6fd184c2..fe7f52ee1ed400b4eda28e3f90edc0044a5aa7a9 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8319,6 +8319,40 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
   }
 )
 
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND in which only the flags result is interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_and_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand")
+	   (match_operand 4)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (and:<VPRED>
+	     (unspec:<VPRED>
+	       [(match_operand 5)
+	        (const_int SVE_KNOWN_PTRUE)
+	        (SVE_INT_CMP:<VPRED>
+		  (match_operand:SVE_I 2 "register_operand")
+		  (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+	       UNSPEC_PRED_Z)
+	     (match_operand:<VPRED> 6 "register_operand"))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:<VPRED> 0))]
+  "TARGET_SVE"
+  {@ [ cons: =0, 1    , 2 , 3            ; attrs: pred_clobber ]
+     [ &Upa    ,  Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, #%3
+     [ ?Upl    ,  0  , w , <sve_imm_con>; yes                 ] ^
+     [ Upa     ,  Upl, w , <sve_imm_con>; no                  ] ^
+     [ &Upa    ,  Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, %3.<Vetype>
+     [ ?Upl    ,  0  , w , w            ; yes                 ] ^
+     [ Upa     ,  Upl, w , w            ; no                  ] ^
+  }
+  "&& !rtx_equal_p (operands[4], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[4]);
+  }
+)
+
 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
 ;; comparison with an AND.  Split the instruction into its preferred form
 ;; at the earliest opportunity, in order to get rid of the redundant
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index 85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32)));
 **	...
 **	ld1w	z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
 **	cmple	p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	...
 */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
index d7cef1105410be04ed67d1d3b800746267f205a8..8bd6fafc4d4248cf0acf7dfa2f07cd005f13de35 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
@@ -8,7 +8,6 @@ int b[N] = {0};
 ** f1:
 **	...
 **	cmpgt	p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	b.(any|none)	\.L[0-9]+
 **	...
 */
@@ -25,7 +24,6 @@ void f1 ()
 ** f2:
 **	...
 **	cmpge	p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	b.(any|none)	\.L[0-9]+
 **	...
 */
@@ -42,7 +40,6 @@ void f2 ()
 ** f3:
 **	...
 **	cmpeq	p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	b.(any|none)	\.L[0-9]+
 **	...
 */
@@ -59,7 +56,6 @@ void f3 ()
 ** f4:
 **	...
 **	cmpne	p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	b.(any|none)	\.L[0-9]+
 **	...
 */
@@ -76,7 +72,6 @@ void f4 ()
 ** f5:
 **	...
 **	cmplt	p[0-9]+.s, p7/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	b.(any|none)	.L[0-9]+
 **	...
 */
@@ -93,7 +88,6 @@ void f5 ()
 ** f6:
 **	...
 **	cmple	p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-**	ptest	p[0-9]+, p[0-9]+.b
 **	b.(any|none)	\.L[0-9]+
 **	...
 */

Reply via email to