Hi,
This is a follow up patch for previous vcond patches. In previous ones,
we rely on combiner to simplify "X = !Y; Z = X ? A : B" into "Z = Y ? B : A".
That works for some cases, but not all of them, for example, case in
PR69848. The reason could be in combiner, but more likely in bsl patterns
which are too complicated to be handled by combiner. Investigating all
cases pattern by pattern would be tedious, this patch modifies vcond
patterns to explicitly invert comparison code (as well as switch operands)
to avoid the additional NOT instruction. Note un-ordered floating point
comparison is not handled because it will complicate the code, also NE is
the most common case. The patch further reduces assembly code in
PR69848 on the basis of vcond patches.
Bootstrap and test on AArch64. Is it OK?
Thanks,
bin
2016-08-03 Bin Cheng <bin.ch...@arm.com>
PR tree-optimization/69848
* config/aarch64/aarch64-simd.md (vcond<mode><mode>): Invert NE
and swtich operands to avoid additional NOT instruction.
(vcond<v_cmp_mixed><mode>): Ditto.
(vcondu<mode><mode>, vcondu<mode><v_cmp_mixed>): Ditto.
gcc/testsuite/ChangeLog
2016-08-03 Bin Cheng <bin.ch...@arm.com>
PR tree-optimization/69848
* gcc.target/aarch64/simd/vcond-ne-bit.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index dca079f..3fa88be 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2575,6 +2575,15 @@
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
enum rtx_code code = GET_CODE (operands[3]);
+ /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+ it as well as switch operands 1/2 in order to avoid the additional
+ NOT instruction. */
+ if (code == NE)
+ {
+ operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+ operands[4], operands[5]);
+ std::swap (operands[1], operands[2]);
+ }
emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
@@ -2596,6 +2605,15 @@
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
enum rtx_code code = GET_CODE (operands[3]);
+ /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+ it as well as switch operands 1/2 in order to avoid the additional
+ NOT instruction. */
+ if (code == NE)
+ {
+ operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+ operands[4], operands[5]);
+ std::swap (operands[1], operands[2]);
+ }
emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
@@ -2618,6 +2636,15 @@
rtx mask = gen_reg_rtx (<MODE>mode);
enum rtx_code code = GET_CODE (operands[3]);
+ /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+ it as well as switch operands 1/2 in order to avoid the additional
+ NOT instruction. */
+ if (code == NE)
+ {
+ operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+ operands[4], operands[5]);
+ std::swap (operands[1], operands[2]);
+ }
emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
@@ -2638,6 +2665,15 @@
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
enum rtx_code code = GET_CODE (operands[3]);
+ /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+ it as well as switch operands 1/2 in order to avoid the additional
+ NOT instruction. */
+ if (code == NE)
+ {
+ operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+ operands[4], operands[5]);
+ std::swap (operands[1], operands[2]);
+ }
emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
mask, operands[3],
operands[4], operands[5]));
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
b/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
new file mode 100644
index 0000000..25170c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps" } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+#include <stdlib.h>
+
+int fn1 (int) __attribute__ ((noinline));
+
+int a[128];
+int fn1(int d) {
+ int b, c = 1;
+ for (b = 0; b < 128; b++)
+ if (a[b])
+ c = 0;
+ return c;
+}
+
+int
+main (void)
+{
+ int i;
+ for (i = 0; i < 128; i++)
+ a[i] = 0;
+ if (fn1(10) != 1)
+ abort ();
+ a[3] = 2;
+ a[24] = 1;
+ if (fn1(10) != 0)
+ abort ();
+ return 0;
+}
+/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */