Hi,
This is a follow up patch for previous vcond patches.  In previous ones, 
we rely on combiner to simplify "X = !Y; Z = X ? A : B" into "Z = Y ? B : A".
That works for some cases, but not all of them, for example, case in 
PR69848.  The reason could be in combiner, but more likely in bsl patterns
which are too complicated to be handled by combiner.  Investigating all
cases pattern by pattern would be tedious, this patch modifies vcond 
patterns to explicitly invert comparison code (as well as switch operands)
to avoid the additional NOT instruction.  Note un-ordered floating point 
comparison is not handled because it will complicate the code, also NE is 
the most common case.  The patch further reduces assembly code in 
PR69848 on the basis of vcond patches.
Bootstrap and test on AArch64.  Is it OK?

Thanks,
bin

2016-08-03  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/69848
        * config/aarch64/aarch64-simd.md (vcond<mode><mode>): Invert NE
        and swtich operands to avoid additional NOT instruction.
        (vcond<v_cmp_mixed><mode>): Ditto.
        (vcondu<mode><mode>, vcondu<mode><v_cmp_mixed>): Ditto.

gcc/testsuite/ChangeLog
2016-08-03  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/69848
        * gcc.target/aarch64/simd/vcond-ne-bit.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index dca079f..3fa88be 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2575,6 +2575,15 @@
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
   enum rtx_code code = GET_CODE (operands[3]);
 
+  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+     it as well as switch operands 1/2 in order to avoid the additional
+     NOT instruction.  */
+  if (code == NE)
+    {
+      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+                                   operands[4], operands[5]);
+      std::swap (operands[1], operands[2]);
+    }
   emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
                                              operands[4], operands[5]));
   emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
@@ -2596,6 +2605,15 @@
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
   enum rtx_code code = GET_CODE (operands[3]);
 
+  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+     it as well as switch operands 1/2 in order to avoid the additional
+     NOT instruction.  */
+  if (code == NE)
+    {
+      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+                                   operands[4], operands[5]);
+      std::swap (operands[1], operands[2]);
+    }
   emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
                                              operands[4], operands[5]));
   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
@@ -2618,6 +2636,15 @@
   rtx mask = gen_reg_rtx (<MODE>mode);
   enum rtx_code code = GET_CODE (operands[3]);
 
+  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+     it as well as switch operands 1/2 in order to avoid the additional
+     NOT instruction.  */
+  if (code == NE)
+    {
+      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+                                   operands[4], operands[5]);
+      std::swap (operands[1], operands[2]);
+    }
   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
                                      operands[4], operands[5]));
   emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
@@ -2638,6 +2665,15 @@
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
   enum rtx_code code = GET_CODE (operands[3]);
 
+  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
+     it as well as switch operands 1/2 in order to avoid the additional
+     NOT instruction.  */
+  if (code == NE)
+    {
+      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
+                                   operands[4], operands[5]);
+      std::swap (operands[1], operands[2]);
+    }
   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
                                                  mask, operands[3],
                                                  operands[4], operands[5]));
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c 
b/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
new file mode 100644
index 0000000..25170c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps" } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+#include <stdlib.h>
+
+int fn1 (int) __attribute__ ((noinline));
+
+int a[128];
+int fn1(int d) {
+  int b, c = 1;
+  for (b = 0; b < 128; b++)
+    if (a[b])
+      c = 0;
+  return c;
+}
+
+int
+main (void)
+{
+  int i;
+  for (i = 0; i < 128; i++)
+    a[i] = 0;
+  if (fn1(10) != 1)
+    abort ();
+  a[3] = 2;
+  a[24] = 1;
+  if (fn1(10) != 0)
+    abort ();
+  return 0;
+}
+/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */

Reply via email to