As outlined in the BZ, given
(x & y) & C
reassociation will turn that into
(x & C) & y
Which inhibits bit operations on various targets.
Associating as
(x & y) & C
is what we want.
My original patch did this for all binary operations. However,
reviewing the assembly code & dump files before/after it was pretty
clear that doing this for arithmetic was losing (mostly in that we were
missing CSE opportunities).
Restricting to logicals means there's far fewer triggering cases, but in
every one I looked at the resultant code was clearly better.
The testcase is a bit unusual in that it's in tree-ssa. It checks the
output of reassociation. However, on x86 and m68k it also checks the
resulting assembly code, which I believe is unique in the tree-ssa
directory.
Bootstrapped and regression tested on x86_64-linux-gnu. The test has
been verified on x86_64-linux-gnu, i686-linux-gnu and m68k-linux-gnu.
OK for the trunk?
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c7626ff..f5ca85e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2015-12-20 Jeff Law <l...@redhat.com>
+
+ PR tree-optimization/64910
+ * tree-ssa-reassoc.c (swap_ops_for_binary_stmt): Make sure constant
+ is last for binary bit operations.
+
2015-12-21 Pierre-Marie de Rodat <dero...@adacore.com>
* dwarf2out.c (add_data_member_location_attribute): Do not
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bb2ed22..e2f55f3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2015-12-20 Jeff Law <l...@redhat.com>
+
+ PR tree-optimization/64910
+ * gcc.dg/tree-ssa/pr64910-2.c.c: New test.
+
2015-12-21 Claudiu Zissulescu <claz...@synopsys.com>
* gcc.target/arc/builtin_general.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr64910-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr64910-2.c
new file mode 100644
index 0000000..2e3d679
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr64910-2.c
@@ -0,0 +1,85 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+/* We want to make sure that we reassociate in a way that has the
+ constant last. With the constant last, it's more likely to result
+ in a bitfield test on targets with such capabilities. */
+
+extern void boo ();
+
+int b2b_uc (unsigned char u, unsigned char w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+
+int b2b_us (unsigned short u, unsigned short w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+
+int b2b_ui (unsigned int u, unsigned int w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+int b2b_ul (unsigned long u, unsigned long w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+int b2b_ull (unsigned long long u, unsigned long long w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+
+int b2b_sc (signed char u, signed char w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+
+int b2b_ss (signed short u, signed short w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+
+int b2b_si (signed int u, signed int w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+int b2b_sl (signed long u, signed long w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+int b2b_sll (signed long long u, signed long long w)
+{
+ if ((u & w) & 0x20)
+ boo ();
+}
+
+/* The AND of U & W should go into a temporary, when is then ANDed
+ with the constant.
+
+ First verify that we have the right number of ANDs between U and W. */
+/* { dg-final { scan-tree-dump-times "\[uw\]_\[0-9\]+.D. \&
\[uw\]_\[0-9\]+.D.;" 10 "reassoc1"} } */
+
+/* Then verify that we have the right number of ANDS between a temporary
+ and the constant. */
+/* { dg-final { scan-tree-dump-times "_\[0-9]+ \& 32;" 10 "reassoc1"} } */
+
+/* Each function has one AND. It will have either a second AND or TEST. So
+ we can count the number of AND and TEST instructions. They must be 2X
+ the number of test functions in this file. */
+/* { dg-final { scan-assembler-times "and|test" 20 { target { i?86-*-*
x86_64-*-*} } } } */
+
+/* Similarly on the m68k. The code for the long long tests is suboptimal,
+ which catch via the second pattern and xfail. */
+/* { dg-final { scan-assembler-times "and|btst" 20 { target { m68k-*-* } } } }
*/
+/* { dg-final { scan-assembler-not "or" { target { m68k-*-* } xfail { *-*-* }
} } } */
+
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index e54700e..1dcfce3 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3449,6 +3449,26 @@ swap_ops_for_binary_stmt (vec<operand_entry *> ops,
oe1->op = temp.op;
oe1->rank = temp.rank;
}
+ /* For X OP Y OP C, associate as (X OP Y) OP C, but only for
+ logicals, which encourages bit operations. Experimentation
+ has shown this generally isn't a win for arithmetic. */
+ else if (stmt)
+ {
+ enum tree_code opcode = gimple_assign_rhs_code (stmt);
+ if ((opcode == BIT_AND_EXPR
+ || opcode == BIT_IOR_EXPR
+ || opcode == BIT_XOR_EXPR)
+ && TREE_CODE (oe1->op) != INTEGER_CST
+ && TREE_CODE (oe2->op) != INTEGER_CST
+ && TREE_CODE (oe3->op) == INTEGER_CST)
+ {
+ operand_entry temp = *oe3;
+ oe3->op = oe1->op;
+ oe3->rank = oe1->rank;
+ oe1->op = temp.op;
+ oe1->rank= temp.rank;
+ }
+ }
}
/* If definition of RHS1 or RHS2 dominates STMT, return the later of those