When doing boolean reductions for Adv. SIMD vectors and SVE is available
we can use SVE instructions instead of Adv. SIMD ones to do the reduction.
For instance OR-reductions are
umaxp v3.4s, v3.4s, v3.4s
fmov x1, d3
cmp x1, 0
cset w0, ne
and with SVE we generate:
ptrue p1.b, vl16
cmpne p1.b, p1/z, z3.b, #0
cset w0, any
Where the ptrue is normally executed much earlier so it's not a bottleneck for
the compare.
For the remaining codegen see test vect-reduc-bool-18.c.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (reduc_sbool_and_scal_<mode>,
reduc_sbool_ior_scal_<mode>, reduc_sbool_xor_scal_<mode>): Use SVE if
available.
* config/aarch64/aarch64-sve.md (*cmp<cmp_op><mode>_ptest): Rename ...
(@aarch64_pred_cmp<cmp_op><mode>_ptest): ... To this.
(reduc_sbool_xor_scal_<mode>): Rename ...
(@reduc_sbool_xor_scal_<mode>): ... To this.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/vect-reduc-bool-10.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-11.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-12.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-13.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-14.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-15.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-16.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-17.c: New test.
* gcc.target/aarch64/sve/vect-reduc-bool-18.c: New test.
---
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
5eddc05b5749bbd080a085db2e15dbb9bbce3be3..87c2dc84bfe45f29150980117a8e4c180a0cd02b
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3478,6 +3478,29 @@ (define_expand "reduc_sbool_and_scal_<mode>"
UNSPEC_ANDV))]
"TARGET_SIMD"
{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp_ptest (EQ, full_mode, pred_res, gp, in,
+ CONST0_RTX (full_mode), cast_gp,
+ gp_flag, cast_gp, gp_flag));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+
rtx tmp = operands[1];
/* For 64-bit vectors we need no reductions. */
if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
@@ -3507,6 +3530,29 @@ (define_expand "reduc_sbool_ior_scal_<mode>"
UNSPEC_IORV))]
"TARGET_SIMD"
{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp_ptest (NE, full_mode, pred_res, gp, in,
+ CONST0_RTX (full_mode), cast_gp,
+ gp_flag, cast_gp, gp_flag));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+
rtx tmp = operands[1];
/* For 64-bit vectors we need no reductions. */
if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
@@ -3537,6 +3583,24 @@ (define_expand "reduc_sbool_xor_scal_<mode>"
UNSPEC_XORV))]
"TARGET_SIMD"
{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp (NE, full_mode, pred_res, cast_gp, gp_flag, in,
+ CONST0_RTX (full_mode)));
+ emit_insn (gen_reduc_sbool_xor_scal (pred_mode, operands[0], pred_res));
+ DONE;
+ }
+
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode));
emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg));
diff --git a/gcc/config/aarch64/aarch64-sve.md
b/gcc/config/aarch64/aarch64-sve.md
index
808a0e8bc00d1c8e79d987c83d542211fc25f8e0..3e6672948b14aaed8138b1852d997c29b8a5272f
100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8745,7 +8745,7 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc"
;; Predicated integer comparisons in which only the flags result is
;; interesting.
-(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
+(define_insn_and_rewrite "@aarch64_pred_cmp<cmp_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
@@ -9963,7 +9963,7 @@ (define_expand "reduc_sbool_ior_scal_<mode>"
;; cntp x0, p0, p0.b
;; and w0, w0, 1
;;
-(define_expand "reduc_sbool_xor_scal_<mode>"
+(define_expand "@reduc_sbool_xor_scal_<mode>"
[(set (match_dup 2)
(zero_extend:DI
(unspec:SI [(match_dup 1)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c
new file mode 100644
index
0000000000000000000000000000000000000000..c0ff50e914b79b36821725ab71474a919f6d22ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c
new file mode 100644
index
0000000000000000000000000000000000000000..3597fc4a456c1186e257a188dd79888cd6466e56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c
new file mode 100644
index
0000000000000000000000000000000000000000..b1173627403d654bc0e64bc141546f8f3889a8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c
new file mode 100644
index
0000000000000000000000000000000000000000..a2b8a712043459096fdf3357beb7e30467c959a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c
new file mode 100644
index
0000000000000000000000000000000000000000..c24e13294fe26daed85fc390ee1e647ea2d80506
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c
new file mode 100644
index
0000000000000000000000000000000000000000..0233b8ae2338fe0c3727f5ae561699e520602d61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c
new file mode 100644
index
0000000000000000000000000000000000000000..e731b556424f022a218a484f0c22d2d8a7f79c62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c
new file mode 100644
index
0000000000000000000000000000000000000000..efbec019bf7ad3e71b7ade7940e0d796142694d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" {
target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c
new file mode 100644
index
0000000000000000000000000000000000000000..a47c306e13c688e6014524bfb939dc46ed65b8a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only
-fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2
-fdump-tree-vect-details" }*/
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char p[128];
+
+/*
+** fand:
+** ...
+** ptrue p[0-9]+.b, vl16
+** cmpeq p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0
+** cset w[0-9]+, none
+** ...
+*/
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+/*
+** fior:
+** ...
+** ptrue p[0-9]+.b, vl16
+** cmpne p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0
+** cset w[0-9]+, any
+** ...
+*/
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+/*
+** fxor:
+** ...
+** ptrue p[0-9]+.b, vl16
+** cmpne p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0
+** cntp x[0-9]+, p[0-9]+, p[0-9]+.b
+** and w[0-9]+, w[0-9]+, 1
+** ...
+*/
+bool __attribute__((noipa))
+fxor (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } }
*/
+
--
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 5eddc05b5749bbd080a085db2e15dbb9bbce3be3..87c2dc84bfe45f29150980117a8e4c180a0cd02b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3478,6 +3478,29 @@ (define_expand "reduc_sbool_and_scal_<mode>"
UNSPEC_ANDV))]
"TARGET_SIMD"
{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp_ptest (EQ, full_mode, pred_res, gp, in,
+ CONST0_RTX (full_mode), cast_gp,
+ gp_flag, cast_gp, gp_flag));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+
rtx tmp = operands[1];
/* For 64-bit vectors we need no reductions. */
if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
@@ -3507,6 +3530,29 @@ (define_expand "reduc_sbool_ior_scal_<mode>"
UNSPEC_IORV))]
"TARGET_SIMD"
{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp_ptest (NE, full_mode, pred_res, gp, in,
+ CONST0_RTX (full_mode), cast_gp,
+ gp_flag, cast_gp, gp_flag));
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+ rtx tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+ emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+ DONE;
+ }
+
rtx tmp = operands[1];
/* For 64-bit vectors we need no reductions. */
if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
@@ -3537,6 +3583,24 @@ (define_expand "reduc_sbool_xor_scal_<mode>"
UNSPEC_XORV))]
"TARGET_SIMD"
{
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+ rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx pred_res = gen_reg_rtx (pred_mode);
+ rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode);
+ rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+ emit_insn (
+ gen_aarch64_pred_cmp (NE, full_mode, pred_res, cast_gp, gp_flag, in,
+ CONST0_RTX (full_mode)));
+ emit_insn (gen_reduc_sbool_xor_scal (pred_mode, operands[0], pred_res));
+ DONE;
+ }
+
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode));
emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg));
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 808a0e8bc00d1c8e79d987c83d542211fc25f8e0..3e6672948b14aaed8138b1852d997c29b8a5272f 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8745,7 +8745,7 @@ (define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc"
;; Predicated integer comparisons in which only the flags result is
;; interesting.
-(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
+(define_insn_and_rewrite "@aarch64_pred_cmp<cmp_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
@@ -9963,7 +9963,7 @@ (define_expand "reduc_sbool_ior_scal_<mode>"
;; cntp x0, p0, p0.b
;; and w0, w0, 1
;;
-(define_expand "reduc_sbool_xor_scal_<mode>"
+(define_expand "@reduc_sbool_xor_scal_<mode>"
[(set (match_dup 2)
(zero_extend:DI
(unspec:SI [(match_dup 1)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0ff50e914b79b36821725ab71474a919f6d22ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c
new file mode 100644
index 0000000000000000000000000000000000000000..3597fc4a456c1186e257a188dd79888cd6466e56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..b1173627403d654bc0e64bc141546f8f3889a8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c
new file mode 100644
index 0000000000000000000000000000000000000000..a2b8a712043459096fdf3357beb7e30467c959a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c
new file mode 100644
index 0000000000000000000000000000000000000000..c24e13294fe26daed85fc390ee1e647ea2d80506
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c
new file mode 100644
index 0000000000000000000000000000000000000000..0233b8ae2338fe0c3727f5ae561699e520602d61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c
new file mode 100644
index 0000000000000000000000000000000000000000..e731b556424f022a218a484f0c22d2d8a7f79c62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c
new file mode 100644
index 0000000000000000000000000000000000000000..efbec019bf7ad3e71b7ade7940e0d796142694d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c
new file mode 100644
index 0000000000000000000000000000000000000000..a47c306e13c688e6014524bfb939dc46ed65b8a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char p[128];
+
+/*
+** fand:
+** ...
+** ptrue p[0-9]+.b, vl16
+** cmpeq p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0
+** cset w[0-9]+, none
+** ...
+*/
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+/*
+** fior:
+** ...
+** ptrue p[0-9]+.b, vl16
+** cmpne p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0
+** cset w[0-9]+, any
+** ...
+*/
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+/*
+** fxor:
+** ...
+** ptrue p[0-9]+.b, vl16
+** cmpne p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0
+** cntp x[0-9]+, p[0-9]+, p[0-9]+.b
+** and w[0-9]+, w[0-9]+, 1
+** ...
+*/
+bool __attribute__((noipa))
+fxor (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } } */
+