In the example void f1 () { for (int i = 0; i < N; i++) { b[i] += a[i]; if (a[i] > 0) break; } }
when compiled for SVE we generate: ld1w z28.s, p7/z, [x4, x0, lsl 2] cmpgt p14.s, p7/z, z28.s, #0 ptest p15, p14.b b.none .L3 Where the ptest isn't needed since the branch only cares about the Z and NZ flags. GCC Today supports eliding this through the pattern *cmp<cmp_op><mode>_ptest however this pattern only supports the removal when the outermost context is a CMP where the predicate is inside the condition itself. This typically only happens for an unpredicated CMP as a ptrue will be generated during expand. In the case about at the GIMPLE level we have mask_patt_14.15_57 = vect__2.11_52 > { 0, ... }; vec_mask_and_58 = loop_mask_48 & mask_patt_14.15_57; if (vec_mask_and_58 != { 0, ... }) goto <bb 5>; [5.50%] else goto <bb 6>; [94.50%] where the loop mask is applied to the compare as an AND. The loop mask is moved into the compare by the pattern *cmp<cmp_op><mode>_and which moves the mask inside if the current mask is a ptrue since p && true -> p. However this happens after combine, and so we can't both move the predicate inside AND eliminate the ptests. To fix this this patch adds a new pattern *cmp<cmp_op><mode>_and_ptest which combines these two patterns together allowing us to both push the predicate inside and eliminate the ptest. After this patch we generate ld1w z28.s, p7/z, [x4, x0, lsl 2] cmpgt p14.s, p7/z, z28.s, #0 b.none .L3 Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR target/118974 * config/aarch64/aarch64-sve.md (*cmp<cmp_op><mode>_and_ptest): New. gcc/testsuite/ChangeLog: PR target/118974 * gcc.target/aarch64/sve/pr119351.c: Update codegen. * gcc.target/aarch64/sve/vect-early-break-cbranch.c: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index bf7569f932b6d7392b9c4fb7b94efafb6fd184c2..fe7f52ee1ed400b4eda28e3f90edc0044a5aa7a9 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8319,6 +8319,40 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" } ) +;; Predicated integer comparisons, formed by combining a PTRUE-predicated +;; comparison with an AND in which only the flags result is interesting. +(define_insn_and_rewrite "*cmp<cmp_op><mode>_and_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand") + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (and:<VPRED> + (unspec:<VPRED> + [(match_operand 5) + (const_int SVE_KNOWN_PTRUE) + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z) + (match_operand:<VPRED> 6 "register_operand"))] + UNSPEC_PTEST)) + (clobber (match_scratch:<VPRED> 0))] + "TARGET_SVE" + {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ] + [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, #%3 + [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^ + [ Upa , Upl, w , <sve_imm_con>; no ] ^ + [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, %3.<Vetype> + [ ?Upl , 0 , w , w ; yes ] ^ + [ Upa , Upl, w , w ; no ] ^ + } + "&& !rtx_equal_p (operands[4], operands[5])" + { + operands[5] = copy_rtx (operands[4]); + } +) + ;; Predicated integer comparisons, formed by combining a PTRUE-predicated ;; comparison with an AND. Split the instruction into its preferred form ;; at the earliest opportunity, in order to get rid of the redundant diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c index 85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c @@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32))); ** ... ** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\] ** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** ... */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c index d7cef1105410be04ed67d1d3b800746267f205a8..8bd6fafc4d4248cf0acf7dfa2f07cd005f13de35 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c @@ -8,7 +8,6 @@ int b[N] = {0}; ** f1: ** ... ** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -25,7 +24,6 @@ void f1 () ** f2: ** ... ** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -42,7 +40,6 @@ void f2 () ** f3: ** ... ** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -59,7 +56,6 @@ void f3 () ** f4: ** ... ** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -76,7 +72,6 @@ void f4 () ** f5: ** ... ** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) .L[0-9]+ ** ... */ @@ -93,7 +88,6 @@ void f5 () ** f6: ** ... ** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ --
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index bf7569f932b6d7392b9c4fb7b94efafb6fd184c2..fe7f52ee1ed400b4eda28e3f90edc0044a5aa7a9 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8319,6 +8319,40 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" } ) +;; Predicated integer comparisons, formed by combining a PTRUE-predicated +;; comparison with an AND in which only the flags result is interesting. +(define_insn_and_rewrite "*cmp<cmp_op><mode>_and_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand") + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (and:<VPRED> + (unspec:<VPRED> + [(match_operand 5) + (const_int SVE_KNOWN_PTRUE) + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z) + (match_operand:<VPRED> 6 "register_operand"))] + UNSPEC_PTEST)) + (clobber (match_scratch:<VPRED> 0))] + "TARGET_SVE" + {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ] + [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, #%3 + [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^ + [ Upa , Upl, w , <sve_imm_con>; no ] ^ + [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %6/z, %2.<Vetype>, %3.<Vetype> + [ ?Upl , 0 , w , w ; yes ] ^ + [ Upa , Upl, w , w ; no ] ^ + } + "&& !rtx_equal_p (operands[4], operands[5])" + { + operands[5] = copy_rtx (operands[4]); + } +) + ;; Predicated integer comparisons, formed by combining a PTRUE-predicated ;; comparison with an AND. Split the instruction into its preferred form ;; at the earliest opportunity, in order to get rid of the redundant diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c index 85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c @@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32))); ** ... ** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\] ** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** ... */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c index d7cef1105410be04ed67d1d3b800746267f205a8..8bd6fafc4d4248cf0acf7dfa2f07cd005f13de35 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c @@ -8,7 +8,6 @@ int b[N] = {0}; ** f1: ** ... ** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -25,7 +24,6 @@ void f1 () ** f2: ** ... ** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -42,7 +40,6 @@ void f2 () ** f3: ** ... ** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -59,7 +56,6 @@ void f3 () ** f4: ** ... ** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */ @@ -76,7 +72,6 @@ void f4 () ** f5: ** ... ** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) .L[0-9]+ ** ... */ @@ -93,7 +88,6 @@ void f5 () ** f6: ** ... ** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** b.(any|none) \.L[0-9]+ ** ... */