From: Philipp Tomsich <p...@gnu.org> The function long f(long a) { return(a & 0xFFFFFFFFull) << 3; } is folded into _1 = a_2(D) << 3; _3 = _1 & 34359738360; wheras the construction return (a & 0xFFFFFFFFull) * 8; results in _1 = a_2(D) & 4294967295; _3 = _1 * 8;
This leads to suboptimal code-generation for RISC-V (march=rv64g), as the shifted constant needs to be expanded into 3 RTX and 2 RTX (one each for the LSHIFT_EXPR and the BIT_AND_EXPR) which will overwhelm the combine pass (a sequence of 5 RTX are not considered): li a5,1 # tmp78, # 23 [c=4 l=4] *movdi_64bit/1 slli a5,a5,35 #, tmp79, tmp78 # 24 [c=4 l=4] ashldi3 addi a5,a5,-8 #, tmp77, tmp79 # 9 [c=4 l=4] adddi3/1 slli a0,a0,3 #, tmp76, tmp80 # 6 [c=4 l=4] ashldi3 and a0,a0,a5 # tmp77,, tmp76 # 15 [c=4 l=4] anddi3/0 ret # 28 [c=0 l=4] simple_return instead of: slli a0,a0,32 #, tmp76, tmp79 # 26 [c=4 l=4] ashldi3 srli a0,a0,29 #,, tmp76 # 27 [c=4 l=4] lshrdi3 ret # 24 [c=0 l=4] simple_return We address this by adding a simplification for (a << s) & M, where ((M >> s) << s) == M to (a & M_unshifted) << s, where M_unshifted := (M >> s) which undistributes the LSHIFT. Signed-off-by: Philipp Tomsich <p...@gnu.org> --- gcc/match.pd | 11 +++++++++-- gcc/testsuite/gcc.target/riscv/zextws.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/zextws.c diff --git a/gcc/match.pd b/gcc/match.pd index 349eab6..6bb9535 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3079,6 +3079,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } } } + (if (GIMPLE && (((mask >> shiftc) << shiftc) == mask) + && (exact_log2((mask >> shiftc) + 1) >= 0) + && (shift == LSHIFT_EXPR)) + (with + { tree newmaskt = build_int_cst_type(TREE_TYPE (@2), mask >> shiftc); } + (shift (convert (bit_and:shift_type (convert @0) { newmaskt; })) @1)) /* ((X << 16) & 0xff00) is (X, 0). */ (if ((mask & zerobits) == mask) { build_int_cst (type, 0); } @@ -3100,7 +3106,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (!tree_int_cst_equal (newmaskt, @2)) (if (shift_type != TREE_TYPE (@3)) (bit_and (convert (shift:shift_type (convert @3) @1)) { newmaskt; }) - (bit_and @4 { newmaskt; }))))))))))))) + (bit_and @4 { newmaskt; })))))))))))))) /* Fold (X {&,^,|} C2) << C1 into (X << C1) {&,^,|} (C2 << C1) (X {&,^,|} C2) >> C1 into (X >> C1) & (C2 >> C1). */ @@ -3108,7 +3114,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (for bit_op (bit_and bit_xor bit_ior) (simplify (shift (convert?:s (bit_op:s @0 INTEGER_CST@2)) INTEGER_CST@1) - (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)) + && !wi::exact_log2(wi::to_wide(@2) + 1)) (with { tree mask = int_const_binop (shift, fold_convert (type, @2), @1); } (bit_op (shift (convert @0) @1) { mask; })))))) diff --git a/gcc/testsuite/gcc.target/riscv/zextws.c b/gcc/testsuite/gcc.target/riscv/zextws.c new file mode 100644 index 0000000..8ac93f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zextws.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64g -mabi=lp64 -O2" } */ + +/* Test for + (a << s) & M', where ((M >> s) << s) == M + being undistributed into + (a & M_unshifted) << s, where M_unshifted := (M >> s) + to produce the sequence (or similar) + slli a0,a0,32 + srli a0,a0,29 +*/ +long +zextws_mask (long i) +{ + return (i & 0xffffffffULL) << 3; +} +/* { dg-final { scan-assembler "slli" } } */ +/* { dg-final { scan-assembler "srli" } } */ -- 1.8.3.1