Hi all,
r217118 added an optimization to combine ashiftrt and lshiftrt.
This same optimization can at the very least also apply to lshiftrt + lshiftrt
with the same constraints. i.e. that both operations are done for scalar modes,
that second operation operates on a subreg of the first one and that the shift
amount of the first operation is larger than the mode bitsize of the subreg.
This reduces
umull x1, w0, w1
lsr x1, x1, 32
lsr w1, w1, 5
to
umull x1, w0, w1
lsr x1, x1, 37
Bootstrapped on aarch64-none-linux-gnu and x86_64-linux
and reg-tested on aarch64-none-linux-gnu with no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-04-27 Tamar Christina <tamar.christ...@arm.com>
* simplify-rtx.c (simplify_binary_operation_1): Add LSHIFTRT case.
gcc/testsuite/
2017-04-27 Tamar Christina <tamar.christ...@arm.com>
* gcc.dg/lsr-div1.c: New testcase.
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 640ccb7cb95933a6991bf1599099f7aed455daec..feaceff06d6267b372f40fcd263e2ae67bbd4c74 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3343,19 +3343,21 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
&& UINTVAL (trueop0) == GET_MODE_MASK (mode)
&& ! side_effects_p (op1))
return op0;
+
+ canonicalize_shift:
/* Given:
scalar modes M1, M2
scalar constants c1, c2
size (M2) > size (M1)
c1 == size (M2) - size (M1)
optimize:
- (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
+ ([a|l]shiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
<low_part>)
(const_int <c2>))
to:
- (subreg:M1 (ashiftrt:M2 (reg:M2) (const_int <c1 + c2>))
+ (subreg:M1 ([a|l]shiftrt:M2 (reg:M2) (const_int <c1 + c2>))
<low_part>). */
- if (code == ASHIFTRT
+ if ((code == ASHIFTRT || code == LSHIFTRT)
&& !VECTOR_MODE_P (mode)
&& SUBREG_P (op0)
&& CONST_INT_P (op1)
@@ -3372,13 +3374,13 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+ INTVAL (op1));
machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
- tmp = simplify_gen_binary (ASHIFTRT,
+ tmp = simplify_gen_binary (code,
GET_MODE (SUBREG_REG (op0)),
XEXP (SUBREG_REG (op0), 0),
tmp);
return lowpart_subreg (mode, tmp, inner_mode);
}
- canonicalize_shift:
+
if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
{
val = INTVAL (op1) & (GET_MODE_PRECISION (mode) - 1);
diff --git a/gcc/testsuite/gcc.dg/lsr-div1.c b/gcc/testsuite/gcc.dg/lsr-div1.c
new file mode 100644
index 0000000000000000000000000000000000000000..962054d34d953b63c9736134b9ad147791a491d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lsr-div1.c
@@ -0,0 +1,57 @@
+/* Test division by const int generates only one shift. */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
+/* { dg-options "-O2 -fdump-rtl-combine-all -mtune=cortex-a53" { target aarch64*-*-* } } */
+/* { dg-require-effective-target int32plus } */
+
+extern void abort (void);
+
+#define NOINLINE __attribute__((noinline))
+
+static NOINLINE int
+f1 (unsigned int n)
+{
+ return n % 0x33;
+}
+
+static NOINLINE int
+f2 (unsigned int n)
+{
+ return n % 0x12;
+}
+
+int
+main ()
+{
+ int a = 0xaaaaaaaa;
+ int b = 0x55555555;
+ int c;
+ c = f1 (a);
+ if (c != 0x11)
+ abort ();
+ c = f1 (b);
+ if (c != 0x22)
+ abort ();
+ c = f2 (a);
+ if (c != 0xE)
+ abort ();
+ c = f2 (b);
+ if (c != 0x7)
+ abort ();
+ return 0;
+}
+
+/* Following replacement pattern of intger division by constant, GCC is expected
+ to generate UMULL and (x)SHIFTRT. This test checks that considering division
+ by const 0x33, gcc generates a single LSHIFTRT by 37, instead of
+ two - LSHIFTRT by 32 and LSHIFTRT by 5. */
+
+/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(lshiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(const_int 37 " "combine" { target aarch64*-*-* } } } */
+
+/* Similarly, considering division by const 0x12, gcc generates a
+ single LSHIFTRT by 34, instead of two - LSHIFTRT by 32 and LSHIFTRT by 2. */
+
+/* { dg-final { scan-rtl-dump "\\(const_int 34 " "combine" { target aarch64*-*-* } } } */
+