Hi all,

For this PR I want to teach combine to deal with unrecognisable patterns that 
contain a sub-expression like
(x + x) by transforming it into (x << 1) and trying to match the result. This 
is because some instruction
sets like arm and aarch64 can combine shifts with other arithmetic operations 
or have shifts in their RTL representation
of more complex operations (like the aarch64 UBFIZ instruction which can be 
expressed as a zero_extend+ashift pattern).

Due to a change in rtx costs for -mcpu=cortex-a53 in GCC 5 we no longer expand an 
expression like x * 2 as x << 1
but rather as x + x, which hurts combination opportunities dues to this 
deficiency.

This patch addresses the issue in the recog_for_combine function in combine.c 
in a similar way to the change_zero_ext
trick. That is, if it recog_for_combine fails to match a pattern it replaces 
all instances of x + x in the
rtx with x << 1 and tries again.

This way I've been able to get combine to more aggressively generate the 
arithmetic+shift forms of instructions for
-mcpu=cortex-a53 on aarch64 as well as instructions like ubfiz and sbfiz that 
contain shift-by-immediate sub-expressions.

This patch shouldn't affect rtxes that already match, so it should have no 
fallout on other cases.

Bootstrapped and tested on arm, aarch64, x86_64.

For the testcase:
int
foo (int x)
{
  return (x * 2) & 65535;
}

int
bar (int x, int y)
{
  return (x * 2) | y;
}

with -O2 -mcpu=cortex-a53 for aarch64 we now generate:
foo:
        ubfiz   w0, w0, 1, 15
        ret

bar:
        orr     w0, w1, w0, lsl 1
        ret

whereas without this patch we generate:
foo:
        add     w0, w0, w0
        uxth    w0, w0
        ret

bar:
        add     w0, w0, w0
        orr     w0, w0, w1
        ret


PR 68651 is a code quality regression for GCC 5 and GCC 6 that was introduced 
due to updated rtx costs
for -mcpu=cortex-a53 that affected expansion.  The costs changes were correct 
(to the extent that rtx
costs have any meaning) and I think this is a deficiency in combine that should 
be fixed.

I wouldn't propose to backport this to GCC 5.

P.S. For the ubfiz testcase above to combine successfully it needs an aarch64 
rtx costs issue to be resolved
that I proposed a fix for in 
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00526.html.
Otherwise the backend wrongly rejects it on the grounds of wrong costs.

Is this ok for trunk once the costs issue at 
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00526.html
gets resolved?

Thanks,
Kyrill

2015-12-14  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    PR rtl-optimization/68651
    * combine.c (change_shift_by_one): New function.
    (change_rtx_with_func): Likewise.
    (recog_for_combine): Use the above to transform reg + reg
    sub-expressions into reg << 1 within non-recognisable patterns
    and try to match the result.

2015-12-14  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    PR rtl-optimization/68651
    * gcc.target/aarch64/pr68651_1.c: New test.
diff --git a/gcc/combine.c b/gcc/combine.c
index c008d2a786ebeaa7560acbd60c7c2e8cdacdc9aa..9465e5927145e768f1a5fc43ce7c3621033d8aef 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -11063,6 +11063,60 @@ change_zero_ext (rtx *src)
   return changed;
 }
 
+/* Replace in SRC all sub-rtxes of the form x + x
+   with x << 1 to help recognition on targets with combined
+   shift operations.  Return true iff such any such change was made.  */
+
+static bool
+change_shift_by_one (rtx *src)
+{
+  bool changed = false;
+
+  subrtx_ptr_iterator::array_type array;
+  FOR_EACH_SUBRTX_PTR (iter, array, src, NONCONST)
+    {
+      rtx x = **iter;
+      machine_mode mode = GET_MODE (x);
+
+      if (GET_CODE (x) == PLUS && GET_MODE_CLASS (mode) == MODE_INT
+	  && !side_effects_p (XEXP (x, 0))
+	  && rtx_equal_p (XEXP (x, 0), XEXP (x, 1)))
+	x = simplify_gen_binary (ASHIFT, mode, XEXP (x, 0), const1_rtx);
+      else
+	continue;
+
+      SUBST (**iter, x);
+      changed = true;
+    }
+
+  return changed;
+}
+
+/* Modify the sources of all sets in PAT using the function FUNC that takes
+   a pointer to the rtx to modify and returns true iff it made any
+   modifications.  Used by recog_for_combine to twiddle non-matching patterns
+   into something recognisable.  */
+
+static bool
+change_rtx_with_func (rtx pat, bool (*func) (rtx *))
+{
+  bool changed = false;
+
+  if (GET_CODE (pat) == SET)
+    changed = func (&SET_SRC (pat));
+  else if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  rtx set = XVECEXP (pat, 0, i);
+	  if (GET_CODE (set) == SET)
+	    changed |= func (&SET_SRC (set));
+	}
+    }
+  return changed;
+}
+
 /* Like recog, but we receive the address of a pointer to a new pattern.
    We try to match the rtx that the pointer points to.
    If that fails, we may try to modify or replace the pattern,
@@ -11073,6 +11127,9 @@ change_zero_ext (rtx *src)
    to the equivalent AND and perhaps LSHIFTRT patterns, and try with that
    (and undo if that fails).
 
+   If that still doesn't match we change expressions of the form
+   (PLUS reg1 reg1) into (ASHIFT reg1 (const_int 1)).  Undo if that fails.
+
    PNOTES is a pointer to a location where any REG_UNUSED notes added for
    the CLOBBERs are placed.
 
@@ -11090,19 +11147,7 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
   void *marker = get_undo_marker ();
   bool changed = false;
 
-  if (GET_CODE (pat) == SET)
-    changed = change_zero_ext (&SET_SRC (pat));
-  else if (GET_CODE (pat) == PARALLEL)
-    {
-      int i;
-      for (i = 0; i < XVECLEN (pat, 0); i++)
-	{
-	  rtx set = XVECEXP (pat, 0, i);
-	  if (GET_CODE (set) == SET)
-	    changed |= change_zero_ext (&SET_SRC (set));
-	}
-    }
-
+  changed = change_rtx_with_func (pat, change_zero_ext);
   if (changed)
     {
       insn_code_number = recog_for_combine_1 (pnewpat, insn, pnotes);
@@ -11111,6 +11156,20 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
 	undo_to_marker (marker);
     }
 
+  if (insn_code_number < 0)
+    {
+      marker = get_undo_marker ();
+      changed = change_rtx_with_func (pat, change_shift_by_one);
+
+      if (changed)
+	{
+	  insn_code_number = recog_for_combine_1 (pnewpat, insn, pnotes);
+
+	  if (insn_code_number < 0)
+	    undo_to_marker (marker);
+	}
+
+    }
   return insn_code_number;
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr68651_1.c b/gcc/testsuite/gcc.target/aarch64/pr68651_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..ef9456f538776e7db01ecf5473425aed9efd9de2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr68651_1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a53" } */
+
+int
+foo (int x)
+{
+  return (x * 2) & 65535;
+}
+/* { dg-final { scan-assembler "ubfiz\tw\[0-9\]*, w\[0-9\]*.*\n" } } */
+
+int
+bar (int x, int y)
+{
+  return (x * 2) | y;
+}
+/* { dg-final { scan-assembler "orr\tw\[0-9\]*, w\[0-9\]*, w\[0-9\]*, lsl 1.*\n" } } */

Reply via email to