Hi! Multiplication/division/modulo/float operands are handled by libgcc calls and so need to be passed as array of limbs with precision argument, using handle_operand_addr. That code can't deal with more than one cast, so the following patch avoids merging those cases. .MUL_OVERFLOW calls use the same code, but we don't actually try to merge the operands in that case already.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2023-12-21 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/112941 * gimple-lower-bitint.cc (gimple_lower_bitint): Disallow merging a cast with multiplication, division or conversion to floating point if rhs1 of the cast is result of another single use cast in the same bb. * gcc.dg/bitint-56.c: New test. * gcc.dg/bitint-57.c: New test. --- gcc/gimple-lower-bitint.cc.jj 2023-12-20 11:32:45.898425928 +0100 +++ gcc/gimple-lower-bitint.cc 2023-12-20 12:47:00.168727583 +0100 @@ -6060,6 +6060,26 @@ gimple_lower_bitint (void) || (bitint_precision_kind (TREE_TYPE (rhs1)) < bitint_prec_large)) continue; + if (is_gimple_assign (use_stmt)) + switch (gimple_assign_rhs_code (use_stmt)) + { + case MULT_EXPR: + case TRUNC_DIV_EXPR: + case TRUNC_MOD_EXPR: + case FLOAT_EXPR: + /* Uses which use handle_operand_addr can't + deal with nested casts. */ + if (TREE_CODE (rhs1) == SSA_NAME + && gimple_assign_cast_p + (SSA_NAME_DEF_STMT (rhs1)) + && has_single_use (rhs1) + && (gimple_bb (SSA_NAME_DEF_STMT (rhs1)) + == gimple_bb (SSA_NAME_DEF_STMT (s)))) + goto force_name; + break; + default: + break; + } if ((TYPE_PRECISION (TREE_TYPE (rhs1)) >= TYPE_PRECISION (TREE_TYPE (s))) && mergeable_op (use_stmt)) @@ -6154,6 +6174,7 @@ gimple_lower_bitint (void) && (!SSA_NAME_VAR (s) || VAR_P (SSA_NAME_VAR (s)))) continue; + force_name: if (!large_huge.m_names) large_huge.m_names = BITMAP_ALLOC (NULL); bitmap_set_bit (large_huge.m_names, SSA_NAME_VERSION (s)); --- gcc/testsuite/gcc.dg/bitint-56.c.jj 2023-12-20 12:41:28.208398219 +0100 +++ gcc/testsuite/gcc.dg/bitint-56.c 2023-12-20 12:38:08.548204489 +0100 @@ -0,0 +1,129 @@ +/* PR tree-optimization/112941 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +#if __BITINT_MAXWIDTH__ >= 4096 +void +f1 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] *= (unsigned _BitInt(2048)) r; + p[1] *= (unsigned _BitInt(2048)) s; + p[2] *= (unsigned _BitInt(2048)) t; + p[3] *= (unsigned _BitInt(2048)) u; +} + +void +f2 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] /= (unsigned _BitInt(2048)) r; + p[1] /= (unsigned _BitInt(2048)) s; + p[2] /= (unsigned _BitInt(2048)) t; + p[3] /= (unsigned _BitInt(2048)) u; +} + +void +f3 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] *= (unsigned _BitInt(2110)) r; + p[1] *= (unsigned _BitInt(2110)) s; + p[2] *= (unsigned _BitInt(2110)) t; + p[3] *= (unsigned _BitInt(2110)) u; +} + +void +f4 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] /= (unsigned _BitInt(2110)) r; + p[1] /= (unsigned _BitInt(2110)) s; + p[2] /= (unsigned _BitInt(2110)) t; + p[3] /= (unsigned _BitInt(2110)) u; +} + +void +f5 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] *= (unsigned _BitInt(2048)) r; + p[1] *= (unsigned _BitInt(2048)) s; + p[2] *= (unsigned _BitInt(2048)) t; + p[3] *= (unsigned _BitInt(2048)) u; +} + +void +f6 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] /= (unsigned _BitInt(2048)) r; + p[1] /= (unsigned _BitInt(2048)) s; + p[2] /= (unsigned _BitInt(2048)) t; + p[3] /= (unsigned _BitInt(2048)) u; +} + +void +f7 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] *= (unsigned _BitInt(2110)) r; + p[1] *= (unsigned _BitInt(2110)) s; + p[2] *= (unsigned _BitInt(2110)) t; + p[3] *= (unsigned _BitInt(2110)) u; +} + +void +f8 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) +{ + p[0] /= (unsigned _BitInt(2110)) r; + p[1] /= (unsigned _BitInt(2110)) s; + p[2] /= (unsigned _BitInt(2110)) t; + p[3] /= (unsigned _BitInt(2110)) u; +} + +#if __SIZEOF_INT128__ +void +f9 (_BitInt(4096) *p, __int128 r) +{ + p[0] *= (unsigned _BitInt(2048)) r; +} + +void +f10 (_BitInt(4094) *p, __int128 r) +{ + p[0] /= (unsigned _BitInt(2048)) r; +} + +void +f11 (_BitInt(4096) *p, __int128 r) +{ + p[0] *= (unsigned _BitInt(2110)) r; +} + +void +f12 (_BitInt(4094) *p, __int128 r) +{ + p[0] /= (unsigned _BitInt(2110)) r; +} + +void +f13 (unsigned _BitInt(4096) *p, __int128 r) +{ + p[0] *= (unsigned _BitInt(2048)) r; +} + +void +f14 (unsigned _BitInt(4094) *p, __int128 r) +{ + p[0] /= (unsigned _BitInt(2048)) r; +} + +void +f15 (unsigned _BitInt(4096) *p, __int128 r) +{ + p[0] *= (unsigned _BitInt(2110)) r; +} + +void +f16 (unsigned _BitInt(4094) *p, __int128 r) +{ + p[0] /= (unsigned _BitInt(2110)) r; +} +#endif +#else +int i; +#endif --- gcc/testsuite/gcc.dg/bitint-57.c.jj 2023-12-20 12:42:12.691772991 +0100 +++ gcc/testsuite/gcc.dg/bitint-57.c 2023-12-20 12:42:49.900250015 +0100 @@ -0,0 +1,21 @@ +/* PR tree-optimization/112941 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O1 -fno-tree-forwprop" } */ + +#if __BITINT_MAXWIDTH__ >= 6384 +unsigned _BitInt(2049) +foo (unsigned _BitInt(6384) x, _BitInt(8) y) +{ + unsigned _BitInt(6384) z = y; + return x * z; +} + +_BitInt(2049) +bar (unsigned _BitInt(6384) x, _BitInt(1023) y) +{ + unsigned _BitInt(6384) z = y; + return x * z; +} +#else +int i; +#endif Jakub