https://gcc.gnu.org/g:678ec7143b85e445c8c8c2258d852a09a0df23cb
commit r16-4561-g678ec7143b85e445c8c8c2258d852a09a0df23cb Author: Tamar Christina <[email protected]> Date: Wed Oct 22 10:52:43 2025 +0100 AArch64: Add support for boolean reductions for Adv. SIMD using SVE When doing boolean reductions for Adv. SIMD vectors and SVE is available we can use SVE instructions instead of Adv. SIMD ones to do the reduction. For instance OR-reductions are umaxp v3.4s, v3.4s, v3.4s fmov x1, d3 cmp x1, 0 cset w0, ne and with SVE we generate: ptrue p1.b, vl16 cmpne p1.b, p1/z, z3.b, #0 cset w0, any Where the ptrue is normally executed much earlier so it's not a bottleneck for the compare. For the remaining codegen see test vect-reduc-bool-18.c. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (reduc_sbool_and_scal_<mode>, reduc_sbool_ior_scal_<mode>, reduc_sbool_xor_scal_<mode>): Use SVE if available. * config/aarch64/aarch64-sve.md (*cmp<cmp_op><mode>_ptest): Rename ... (@aarch64_pred_cmp<cmp_op><mode>_ptest): ... To this. (reduc_sbool_xor_scal_<mode>): Rename ... (@reduc_sbool_xor_scal_<mode>): ... To this. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/vect-reduc-bool-10.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-11.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-12.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-13.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-14.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-15.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-16.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-17.c: New test. * gcc.target/aarch64/sve/vect-reduc-bool-18.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 83 ++++++++++++++++++++++ gcc/config/aarch64/aarch64-sve.md | 4 +- .../gcc.target/aarch64/sve/vect-reduc-bool-10.c | 52 ++++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-11.c | 52 ++++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-12.c | 52 ++++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-13.c | 52 ++++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-14.c | 50 +++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-15.c | 50 +++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-16.c | 50 +++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-17.c | 50 +++++++++++++ .../gcc.target/aarch64/sve/vect-reduc-bool-18.c | 60 ++++++++++++++++ 11 files changed, 553 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 648a42f7d0f7..a121a18f9a09 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3477,12 +3477,41 @@ ;; cmn x1, #1 ;; cset w0, eq ;; +;; or with SVE enabled +;; +;; ptrue p1.b, vl16 +;; cmpeq p0.b, p1/z, z1.b, #0 +;; cset w0, none +;; (define_expand "reduc_sbool_and_scal_<mode>" [(set (match_operand:QI 0 "register_operand") (unspec:QI [(match_operand:VALLI 1 "register_operand")] UNSPEC_ANDV))] "TARGET_SIMD" { + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx pred_res = gen_reg_rtx (pred_mode); + rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode); + rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + emit_insn ( + gen_aarch64_pred_cmp_ptest (EQ, full_mode, pred_res, gp, in, + CONST0_RTX (full_mode), cast_gp, + gp_flag, cast_gp, gp_flag)); + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; + } + rtx tmp = operands[1]; /* 128-bit vectors need to be compressed to 64-bits first. */ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) @@ -3511,12 +3540,41 @@ ;; cmp x1, 0 ;; cset w0, ne ;; +;; or with SVE enabled +;; +;; ptrue p1.b, vl16 +;; cmpne p0.b, p1/z, z1.b, #0 +;; cset w0, any +;; (define_expand "reduc_sbool_ior_scal_<mode>" [(set (match_operand:QI 0 "register_operand") (unspec:QI [(match_operand:VALLI 1 "register_operand")] UNSPEC_IORV))] "TARGET_SIMD" { + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx pred_res = gen_reg_rtx (pred_mode); + rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode); + rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + emit_insn ( + gen_aarch64_pred_cmp_ptest (NE, full_mode, pred_res, gp, in, + CONST0_RTX (full_mode), cast_gp, + gp_flag, cast_gp, gp_flag)); + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); + rtx tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); + DONE; + } + rtx tmp = operands[1]; /* 128-bit vectors need to be compressed to 64-bits first. */ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) @@ -3547,12 +3605,37 @@ ;; fmov w1, s3 ;; and w0, w1, 1 ;; +;; or with SVE enabled +;; +;; ptrue p1.b, vl16 +;; cmpne p0.b, p1/z, z1+.b, #0 +;; cntp x1, p0, p0.b +;; and w0, w1, 1 +;; (define_expand "reduc_sbool_xor_scal_<mode>" [(set (match_operand:QI 0 "register_operand") (unspec:QI [(match_operand:VALLI 1 "register_operand")] UNSPEC_XORV))] "TARGET_SIMD" { + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + rtx in = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx pred_res = gen_reg_rtx (pred_mode); + rtx gp = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx cast_gp = lowpart_subreg (pred_mode, gp, VNx16BImode); + rtx gp_flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + emit_insn ( + gen_aarch64_pred_cmp (NE, full_mode, pred_res, cast_gp, gp_flag, in, + CONST0_RTX (full_mode))); + emit_insn (gen_reduc_sbool_xor_scal (pred_mode, operands[0], pred_res)); + DONE; + } + rtx tmp = gen_reg_rtx (<MODE>mode); rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode)); emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg)); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 047c16f974ac..f459f63d6bb2 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8745,7 +8745,7 @@ ;; Predicated integer comparisons in which only the flags result is ;; interesting. -(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" +(define_insn_and_rewrite "@aarch64_pred_cmp<cmp_op><mode>_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand") @@ -9963,7 +9963,7 @@ ;; cntp x0, p0, p0.b ;; and w0, w0, 1 ;; -(define_expand "reduc_sbool_xor_scal_<mode>" +(define_expand "@reduc_sbool_xor_scal_<mode>" [(set (match_dup 2) (zero_extend:DI (unspec:SI [(match_dup 1) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c new file mode 100644 index 000000000000..c0ff50e914b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-10.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +char p[128]; + +bool __attribute__((noipa)) +fand (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r &= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fior (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r |= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fand (n)) + __builtin_abort (); + + p[0] = 0; + for (int n = 1; n < 77; ++n) + if (fand (n)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fior (n)) + __builtin_abort (); + + p[0] = 1; + for (int n = 1; n < 77; ++n) + if (!fior (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c new file mode 100644 index 000000000000..3597fc4a456c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-11.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +short p[128]; + +bool __attribute__((noipa)) +fand (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r &= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fior (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r |= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fand (n)) + __builtin_abort (); + + p[0] = 0; + for (int n = 1; n < 77; ++n) + if (fand (n)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fior (n)) + __builtin_abort (); + + p[0] = 1; + for (int n = 1; n < 77; ++n) + if (!fior (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c new file mode 100644 index 000000000000..b1173627403d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-12.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +int p[128]; + +bool __attribute__((noipa)) +fand (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r &= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fior (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r |= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fand (n)) + __builtin_abort (); + + p[0] = 0; + for (int n = 1; n < 77; ++n) + if (fand (n)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fior (n)) + __builtin_abort (); + + p[0] = 1; + for (int n = 1; n < 77; ++n) + if (!fior (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c new file mode 100644 index 000000000000..a2b8a7120434 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-13.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +long long p[128]; + +bool __attribute__((noipa)) +fand (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r &= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fior (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r |= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fand (n)) + __builtin_abort (); + + p[0] = 0; + for (int n = 1; n < 77; ++n) + if (fand (n)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fior (n)) + __builtin_abort (); + + p[0] = 1; + for (int n = 1; n < 77; ++n) + if (!fior (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c new file mode 100644 index 000000000000..c24e13294fe2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-14.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +char p[128]; + +bool __attribute__((noipa)) +fxort (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fxorf (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fxort (n) != !(n & 1)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n) != (n & 1)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fxort (n)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c new file mode 100644 index 000000000000..0233b8ae2338 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-15.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +short p[128]; + +bool __attribute__((noipa)) +fxort (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fxorf (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fxort (n) != !(n & 1)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n) != (n & 1)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fxort (n)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c new file mode 100644 index 000000000000..e731b556424f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-16.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +int p[128]; + +bool __attribute__((noipa)) +fxort (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fxorf (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fxort (n) != !(n & 1)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n) != (n & 1)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fxort (n)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c new file mode 100644 index 000000000000..efbec019bf7a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-17.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +long long p[128]; + +bool __attribute__((noipa)) +fxort (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +bool __attribute__((noipa)) +fxorf (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +int main() +{ + __builtin_memset (p, 1, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (fxort (n) != !(n & 1)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n) != (n & 1)) + __builtin_abort (); + + __builtin_memset (p, 0, sizeof(p)); + + for (int n = 0; n < 77; ++n) + if (!fxort (n)) + __builtin_abort (); + + for (int n = 0; n < 77; ++n) + if (fxorf (n)) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { target { vect_int && vect_condition } } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c new file mode 100644 index 000000000000..a47c306e13c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-reduc-bool-18.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=asimd-only -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +char p[128]; + +/* +** fand: +** ... +** ptrue p[0-9]+.b, vl16 +** cmpeq p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0 +** cset w[0-9]+, none +** ... +*/ +bool __attribute__((noipa)) +fand (int n) +{ + bool r = true; + for (int i = 0; i < n; ++i) + r &= (p[i] != 0); + return r; +} + +/* +** fior: +** ... +** ptrue p[0-9]+.b, vl16 +** cmpne p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0 +** cset w[0-9]+, any +** ... +*/ +bool __attribute__((noipa)) +fior (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r |= (p[i] != 0); + return r; +} + +/* +** fxor: +** ... +** ptrue p[0-9]+.b, vl16 +** cmpne p[0-9]+.b, p[0-9]+/z, z[0-9]+.b, #0 +** cntp x[0-9]+, p[0-9]+, p[0-9]+.b +** and w[0-9]+, w[0-9]+, 1 +** ... +*/ +bool __attribute__((noipa)) +fxor (int n) +{ + bool r = false; + for (int i = 0; i < n; ++i) + r ^= (p[i] != 0); + return r; +} + +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } } */ +
