From: Philipp Tomsich <[email protected]>
The function
long f(long a)
{
return(a & 0xFFFFFFFFull) << 3;
}
is folded into
_1 = a_2(D) << 3;
_3 = _1 & 34359738360;
wheras the construction
return (a & 0xFFFFFFFFull) * 8;
results in
_1 = a_2(D) & 4294967295;
_3 = _1 * 8;
This leads to suboptimal code-generation for RISC-V (march=rv64g), as
the shifted constant needs to be expanded into 3 RTX and 2 RTX (one
each for the LSHIFT_EXPR and the BIT_AND_EXPR) which will overwhelm
the combine pass (a sequence of 5 RTX are not considered):
li a5,1 # tmp78, # 23 [c=4 l=4]
*movdi_64bit/1
slli a5,a5,35 #, tmp79, tmp78 # 24 [c=4 l=4] ashldi3
addi a5,a5,-8 #, tmp77, tmp79 # 9 [c=4 l=4] adddi3/1
slli a0,a0,3 #, tmp76, tmp80 # 6 [c=4 l=4] ashldi3
and a0,a0,a5 # tmp77,, tmp76 # 15 [c=4 l=4] anddi3/0
ret # 28 [c=0 l=4] simple_return
instead of:
slli a0,a0,32 #, tmp76, tmp79 # 26 [c=4 l=4] ashldi3
srli a0,a0,29 #,, tmp76 # 27 [c=4 l=4] lshrdi3
ret # 24 [c=0 l=4] simple_return
We address this by adding a simplification for
(a << s) & M, where ((M >> s) << s) == M
to
(a & M_unshifted) << s, where M_unshifted := (M >> s)
which undistributes the LSHIFT.
Signed-off-by: Philipp Tomsich <[email protected]>
---
gcc/match.pd | 11 +++++++++--
gcc/testsuite/gcc.target/riscv/zextws.c | 18 ++++++++++++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/zextws.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 349eab6..6bb9535 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3079,6 +3079,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
}
}
+ (if (GIMPLE && (((mask >> shiftc) << shiftc) == mask)
+ && (exact_log2((mask >> shiftc) + 1) >= 0)
+ && (shift == LSHIFT_EXPR))
+ (with
+ { tree newmaskt = build_int_cst_type(TREE_TYPE (@2), mask >> shiftc);
}
+ (shift (convert (bit_and:shift_type (convert @0) { newmaskt; })) @1))
/* ((X << 16) & 0xff00) is (X, 0). */
(if ((mask & zerobits) == mask)
{ build_int_cst (type, 0); }
@@ -3100,7 +3106,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (!tree_int_cst_equal (newmaskt, @2))
(if (shift_type != TREE_TYPE (@3))
(bit_and (convert (shift:shift_type (convert @3) @1)) { newmaskt;
})
- (bit_and @4 { newmaskt; })))))))))))))
+ (bit_and @4 { newmaskt; }))))))))))))))
/* Fold (X {&,^,|} C2) << C1 into (X << C1) {&,^,|} (C2 << C1)
(X {&,^,|} C2) >> C1 into (X >> C1) & (C2 >> C1). */
@@ -3108,7 +3114,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(for bit_op (bit_and bit_xor bit_ior)
(simplify
(shift (convert?:s (bit_op:s @0 INTEGER_CST@2)) INTEGER_CST@1)
- (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+ (if (tree_nop_conversion_p (type, TREE_TYPE (@0))
+ && !wi::exact_log2(wi::to_wide(@2) + 1))
(with { tree mask = int_const_binop (shift, fold_convert (type, @2), @1); }
(bit_op (shift (convert @0) @1) { mask; }))))))
diff --git a/gcc/testsuite/gcc.target/riscv/zextws.c
b/gcc/testsuite/gcc.target/riscv/zextws.c
new file mode 100644
index 0000000..8ac93f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zextws.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64g -mabi=lp64 -O2" } */
+
+/* Test for
+ (a << s) & M', where ((M >> s) << s) == M
+ being undistributed into
+ (a & M_unshifted) << s, where M_unshifted := (M >> s)
+ to produce the sequence (or similar)
+ slli a0,a0,32
+ srli a0,a0,29
+*/
+long
+zextws_mask (long i)
+{
+ return (i & 0xffffffffULL) << 3;
+}
+/* { dg-final { scan-assembler "slli" } } */
+/* { dg-final { scan-assembler "srli" } } */
--
1.8.3.1