The vectorizer has learned how to do boolean reductions of masks to a C bool
for the operations OR, XOR and AND.

This implements the new optabs for SVE.

For SVE & and the | case would use the CC registers.

or_reduc:
        ptest   p0, p0.b
        cset    w0, any

and_reduc:
        ptrue   p3.b, all
        nots    p3.b, p3/z, p0.b
        cset    w0, none

and the ^ case we'd see if the number of active predicate lanes
is a multiple of two.

xor_reduc:
        ptrue   p3.b, all
        cntp    x0, p3, p0.b
        and     w0, w0, 1

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-sve.md (reduc_sbool_and_scal_<mode>,
        reduc_sbool_ior_scal_<mode>, reduc_sbool_xor_scal_<mode>): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/vect-reduc-bool-1.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-2.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-3.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-4.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-5.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-6.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-7.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-8.c: New test.
        * gcc.target/aarch64/sve/vect-reduc-bool-9.c: New test.

---
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 
550ff0a3cde65bfe6f6e680c4d490fa027f3a98c..8126eb5a48d85e0399a5d72233778bd1b1151c51
 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -135,6 +135,7 @@
 ;; ---- [INT,FP] Conditional reductions
 ;; ---- [INT] Tree reductions
 ;; ---- [FP] Tree reductions
+;; ---- [Predicate] Tree reductions
 ;; ---- [FP] Left-to-right reductions
 ;;
 ;; == Permutes
@@ -9887,6 +9888,100 @@ (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
   [(set_attr "sve_type" "sve_fp_reduc")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [Predicate] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - IORV
+;; - XORV
+;; - ANDV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated predicate AND tree reductions.
+;; Invert the predicate and issue a ptrue on the inverted predicate and check
+;; that the Zero flag is set.
+;;
+;; ptrue   p3.b, all
+;; nots    p3.b, p3/z, p0.b
+;; cset    w0, none
+;;
+(define_expand "reduc_sbool_and_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+       (unspec:QI [(match_operand:PRED_ALL 1 "register_operand")]
+                   UNSPEC_ANDV))]
+  "TARGET_SVE"
+  {
+    rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+    rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_pred_one_cmpl_z (<MODE>mode, tmp, cast_ptrue,
+                                           operands[1]));
+    emit_insn (
+      gen_aarch64_ptest<mode> (ptrue, cast_ptrue,
+                              gen_int_mode (SVE_KNOWN_PTRUE, SImode),
+                              tmp));
+    rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+    rtx tmp2 = gen_reg_rtx (SImode);
+    emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+    emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+    DONE;
+  }
+)
+
+;; Unpredicated predicate IOR tree reductions.
+;; We need to make sure the results are in the CC flags, so execute a ptest
+;; on the same predicate.
+;;
+;;   ptest   p0, p0.b
+;;   cset    w0, any
+;;
+(define_expand "reduc_sbool_ior_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+       (unspec:QI [(match_operand:PRED_ALL 1 "register_operand")]
+                   UNSPEC_IORV))]
+  "TARGET_SVE"
+  {
+    rtx ptrue = lowpart_subreg (VNx16BImode, operands[1], <MODE>mode);
+    emit_insn (
+      gen_aarch64_ptest<mode> (ptrue, operands[1],
+                              gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode),
+                              operands[1]));
+    rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+    rtx tmp = gen_reg_rtx (SImode);
+    emit_insn (gen_aarch64_cstoresi (tmp, cmp, cc_reg));
+    emit_move_insn (operands[0], gen_lowpart (QImode, tmp));
+    DONE;
+  }
+)
+
+;; Unpredicated predicate XOR tree reductions.
+;; Check to see if the number of active lanes in the predicates is a multiple
+;; of 2.  This generates:
+;;
+;;   cntp    x0, p0, p0.b
+;;   and     w0, w0, 1
+;;
+(define_expand "reduc_sbool_xor_scal_<mode>"
+  [(set (match_dup 2)
+       (zero_extend:DI
+         (unspec:SI [(match_dup 1)
+                     (const_int SVE_MAYBE_NOT_PTRUE)
+                     (match_operand:PRED_ALL 1 "register_operand")]
+                    UNSPEC_CNTP)))
+    (set (match_dup 4)
+        (and:DI (match_dup 2)
+                (const_int 1)))
+    (set (match_operand:QI 0 "register_operand")
+        (subreg:QI (match_dup 4) 0))]
+  "TARGET_SVE"
+  {
+    operands[2] = gen_reg_rtx (DImode);
+    operands[4] = gen_reg_rtx (DImode);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Left-to-right reductions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..177a7ddeeb0c9d5774f43846466164473543eaf5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..dd6e3939175f126de133aad2f8ca19be82f34769
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..cae2ac8c7ee2c0e9d22a9428692c25ad195bd074
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..3526d8ce6414b747820dedfa12f337a685f3e24d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-4.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-5.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-5.c
new file mode 100644
index 
0000000000000000000000000000000000000000..b6477af8e13e2bd1de3345fddad7e94a432dc72c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-5.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-6.c
new file mode 100644
index 
0000000000000000000000000000000000000000..7333aa4b32f97eeec67cb4502ed318ff2b30e26b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-6.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-7.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-7.c
new file mode 100644
index 
0000000000000000000000000000000000000000..a28ee165f84139468c790becc5cae71603408aa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-7.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-8.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-8.c
new file mode 100644
index 
0000000000000000000000000000000000000000..71695b5b43fcdf421e9bfb9555b77847049be0d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-8.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { 
target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-9.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-9.c
new file mode 100644
index 
0000000000000000000000000000000000000000..ebccb8de9cf9c15068f9fa77b10f5c5c2648c26c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-9.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only 
-fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 
-fdump-tree-vect-details" }*/
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char p[128];
+
+/*
+** fand:
+**     ...
+**     ptrue   p[0-9]+.s, all
+**     nots    p[0-9]+.b, p[0-9]+/z, p[0-9]+.b
+**     cset    w[0-9]+, none
+**     and     w[0-9]+, w[0-9]+, w[0-9]+
+**     ...
+*/
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+/* 
+** fior:
+**     ...
+**     ptest   p[0-9]+, p[0-9]+.b
+**     cset    w[0-9]+, any
+**     orr     w[0-9]+, w[0-9]+, w[0-9]+
+**     ...
+*/
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+/* 
+** fxor:
+**     ...
+**     cntp    x[0-9]+, p[0-9]+, p[0-9]+.h
+**     and     w[0-9]+, w[0-9]+, 1
+**     eor     w[0-9]+, w[0-9]+, w[0-9]+
+**     ...
+*/ 
+bool __attribute__((noipa))
+fxor (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } } 
*/


-- 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 550ff0a3cde65bfe6f6e680c4d490fa027f3a98c..8126eb5a48d85e0399a5d72233778bd1b1151c51 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -135,6 +135,7 @@
 ;; ---- [INT,FP] Conditional reductions
 ;; ---- [INT] Tree reductions
 ;; ---- [FP] Tree reductions
+;; ---- [Predicate] Tree reductions
 ;; ---- [FP] Left-to-right reductions
 ;;
 ;; == Permutes
@@ -9887,6 +9888,100 @@ (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
   [(set_attr "sve_type" "sve_fp_reduc")]
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [Predicate] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - IORV
+;; - XORV
+;; - ANDV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated predicate AND tree reductions.
+;; Invert the predicate and issue a ptrue on the inverted predicate and check
+;; that the Zero flag is set.
+;;
+;; ptrue   p3.b, all
+;; nots    p3.b, p3/z, p0.b
+;; cset    w0, none
+;;
+(define_expand "reduc_sbool_and_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+	(unspec:QI [(match_operand:PRED_ALL 1 "register_operand")]
+		    UNSPEC_ANDV))]
+  "TARGET_SVE"
+  {
+    rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+    rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_pred_one_cmpl_z (<MODE>mode, tmp, cast_ptrue,
+					    operands[1]));
+    emit_insn (
+      gen_aarch64_ptest<mode> (ptrue, cast_ptrue,
+			       gen_int_mode (SVE_KNOWN_PTRUE, SImode),
+			       tmp));
+    rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+    rtx tmp2 = gen_reg_rtx (SImode);
+    emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+    emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+    DONE;
+  }
+)
+
+;; Unpredicated predicate IOR tree reductions.
+;; We need to make sure the results are in the CC flags, so execute a ptest
+;; on the same predicate.
+;;
+;;   ptest   p0, p0.b
+;;   cset    w0, any
+;;
+(define_expand "reduc_sbool_ior_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+	(unspec:QI [(match_operand:PRED_ALL 1 "register_operand")]
+		    UNSPEC_IORV))]
+  "TARGET_SVE"
+  {
+    rtx ptrue = lowpart_subreg (VNx16BImode, operands[1], <MODE>mode);
+    emit_insn (
+      gen_aarch64_ptest<mode> (ptrue, operands[1],
+			       gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode),
+			       operands[1]));
+    rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+    rtx tmp = gen_reg_rtx (SImode);
+    emit_insn (gen_aarch64_cstoresi (tmp, cmp, cc_reg));
+    emit_move_insn (operands[0], gen_lowpart (QImode, tmp));
+    DONE;
+  }
+)
+
+;; Unpredicated predicate XOR tree reductions.
+;; Check to see if the number of active lanes in the predicates is a multiple
+;; of 2.  This generates:
+;;
+;;   cntp    x0, p0, p0.b
+;;   and     w0, w0, 1
+;;
+(define_expand "reduc_sbool_xor_scal_<mode>"
+  [(set (match_dup 2)
+	(zero_extend:DI
+	  (unspec:SI [(match_dup 1)
+		      (const_int SVE_MAYBE_NOT_PTRUE)
+		      (match_operand:PRED_ALL 1 "register_operand")]
+		     UNSPEC_CNTP)))
+    (set (match_dup 4)
+	 (and:DI (match_dup 2)
+		 (const_int 1)))
+    (set (match_operand:QI 0 "register_operand")
+	 (subreg:QI (match_dup 4) 0))]
+  "TARGET_SVE"
+  {
+    operands[2] = gen_reg_rtx (DImode);
+    operands[4] = gen_reg_rtx (DImode);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Left-to-right reductions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-1.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..177a7ddeeb0c9d5774f43846466164473543eaf5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-2.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..dd6e3939175f126de133aad2f8ca19be82f34769
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-3.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..cae2ac8c7ee2c0e9d22a9428692c25ad195bd074
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-4.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..3526d8ce6414b747820dedfa12f337a685f3e24d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-4.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-5.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..b6477af8e13e2bd1de3345fddad7e94a432dc72c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-5.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-6.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-6.c
new file mode 100644
index 0000000000000000000000000000000000000000..7333aa4b32f97eeec67cb4502ed318ff2b30e26b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-6.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-7.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-7.c
new file mode 100644
index 0000000000000000000000000000000000000000..a28ee165f84139468c790becc5cae71603408aa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-7.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-8.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-8.c
new file mode 100644
index 0000000000000000000000000000000000000000..71695b5b43fcdf421e9bfb9555b77847049be0d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-8.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target aarch64_sve_hw } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-9.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-9.c
new file mode 100644
index 0000000000000000000000000000000000000000..ebccb8de9cf9c15068f9fa77b10f5c5c2648c26c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-9.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char p[128];
+
+/*
+** fand:
+** 	...
+** 	ptrue	p[0-9]+.s, all
+** 	nots	p[0-9]+.b, p[0-9]+/z, p[0-9]+.b
+** 	cset	w[0-9]+, none
+** 	and	w[0-9]+, w[0-9]+, w[0-9]+
+** 	...
+*/
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+/* 
+** fior:
+** 	...
+** 	ptest	p[0-9]+, p[0-9]+.b
+** 	cset	w[0-9]+, any
+** 	orr	w[0-9]+, w[0-9]+, w[0-9]+
+** 	...
+*/
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+/* 
+** fxor:
+** 	...
+** 	cntp	x[0-9]+, p[0-9]+, p[0-9]+.h
+** 	and	w[0-9]+, w[0-9]+, 1
+** 	eor	w[0-9]+, w[0-9]+, w[0-9]+
+** 	...
+*/ 
+bool __attribute__((noipa))
+fxor (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } } */

Reply via email to