On Wed, 20 Dec 2023, Jakub Jelinek wrote: > Hi! > > The following patch fixes 2 issues in handling of casts for mergeable > stmts. > The first hunk fixes the case when we have two nested casts (typically > after optimization that is zero-extension of a sign-extension because > everything else should have been folded into a single cast). If > the lowering of the outer cast needs to make the code conditional > (e.g. > for (...) > { > if (idx <= 32) > { > if (idx < 32) > { ... handle_operand (idx); ... } > else > { ... handle_operand (32); ... } > } > ... > } > ) and the lowering of the inner one as well, right now it creates invalid > SSA form, because even for the inner cast we need a PHI on the loop > and the PHI argument from the latch edge is a SSA_NAME initialized in > the conditionally executed bb. The hunk fixes that by detecting such > a case and adding further PHI nodes at the end of the ifs such that > the right value propagates to the next loop iteration. We can use > 0 arguments for the other edges because the inner operand handling > is only done for the first set of iterations and then the other ifs take > over. > > The rest fixes a case of again invalid SSA form, when for a sign extension > we need to use the 0 or -1 value initialized by earlier iteration in > a constant idx case, the code was using the value of the loop PHI argument > from latch edge rather than result; that is correct for cases expanded > in straight line code after the loop, but not inside of the loop for the > cases of handle_cast conditionals, there we should use PHI result. This > is done in the second hunk and supported by the remaining hunks, where > it clears m_bb to tell the code we aren't in the loop anymore. > > Note, this patch doesn't deal with similar problems during multiplication, > division, floating casts etc. where we just emit a library call. I'll > need to make sure in that case we don't merge more than one cast per > operand. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. > 2023-12-20 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/112941 > * gimple-lower-bitint.cc (bitint_large_huge::handle_cast): If > save_cast_conditional, instead of adding assignment of t4 to > m_data[save_data_cnt + 1] before m_gsi, add phi nodes such that > t4 propagates to m_bb loop. For constant idx, use > m_data[save_data_cnt] rather than m_data[save_data_cnt + 1] if inside > of the m_bb loop. > (bitint_large_huge::lower_mergeable_stmt): Clear m_bb when no longer > expanding inside of that loop. > (bitint_large_huge::lower_comparison_stmt): Likewise. > (bitint_large_huge::lower_addsub_overflow): Likewise. > (bitint_large_huge::lower_mul_overflow): Likewise. > (bitint_large_huge::lower_bit_query): Likewise. > > * gcc.dg/bitint-55.c: New test. > > --- gcc/gimple-lower-bitint.cc.jj 2023-12-15 10:10:51.000000000 +0100 > +++ gcc/gimple-lower-bitint.cc 2023-12-19 18:32:52.937388747 +0100 > @@ -1401,8 +1401,29 @@ bitint_large_huge::handle_cast (tree lhs > add_phi_arg (phi, m_data[save_data_cnt], edge_false, > UNKNOWN_LOCATION); > add_phi_arg (phi, ext, edge_true_true, UNKNOWN_LOCATION); > - g = gimple_build_assign (m_data[save_data_cnt + 1], t4); > - insert_before (g); > + if (!save_cast_conditional) > + { > + g = gimple_build_assign (m_data[save_data_cnt + 1], t4); > + insert_before (g); > + } > + else > + for (basic_block bb = gsi_bb (m_gsi);;) > + { > + edge e1 = single_succ_edge (bb); > + edge e2 = find_edge (e1->dest, m_bb), e3; > + tree t5 = (e2 ? m_data[save_data_cnt + 1] > + : make_ssa_name (m_limb_type)); > + phi = create_phi_node (t5, e1->dest); > + edge_iterator ei; > + FOR_EACH_EDGE (e3, ei, e1->dest->preds) > + add_phi_arg (phi, (e3 == e1 ? t4 > + : build_zero_cst (m_limb_type)), > + e3, UNKNOWN_LOCATION); > + if (e2) > + break; > + t4 = t5; > + bb = e1->dest; > + } > } > if (m_bitfld_load) > { > @@ -1470,6 +1491,8 @@ bitint_large_huge::handle_cast (tree lhs > m_data_cnt = tree_to_uhwi (m_data[save_data_cnt + 2]); > if (TYPE_UNSIGNED (rhs_type)) > t = build_zero_cst (m_limb_type); > + else if (m_bb) > + t = m_data[save_data_cnt]; > else > t = m_data[save_data_cnt + 1]; > } > @@ -2586,6 +2609,7 @@ bitint_large_huge::lower_mergeable_stmt > m_gsi = gsi_after_labels (edge_bb); > else > m_gsi = gsi_for_stmt (stmt); > + m_bb = NULL; > } > } > } > @@ -2712,6 +2736,7 @@ bitint_large_huge::lower_mergeable_stmt > NULL_TREE, NULL_TREE); > insert_before (g); > m_gsi = gsi_for_stmt (stmt); > + m_bb = NULL; > } > } > } > @@ -2890,6 +2915,7 @@ bitint_large_huge::lower_comparison_stmt > extract_true_false_edges_from_block (gsi_bb (m_gsi), > &true_edge, &false_edge); > m_gsi = gsi_after_labels (false_edge->dest); > + m_bb = NULL; > } > } > > @@ -4208,6 +4234,7 @@ bitint_large_huge::lower_addsub_overflow > NULL_TREE, NULL_TREE); > insert_before (g); > m_gsi = gsi_for_stmt (final_stmt); > + m_bb = NULL; > } > } > } > @@ -4405,6 +4432,7 @@ bitint_large_huge::lower_mul_overflow (t > &true_edge, > &false_edge); > m_gsi = gsi_after_labels (false_edge->dest); > + m_bb = NULL; > } > } > > @@ -4744,6 +4772,7 @@ bitint_large_huge::lower_bit_query (gimp > m_gsi = gsi_after_labels (edge_bb); > else > m_gsi = gsi_for_stmt (stmt); > + m_bb = NULL; > } > } > } > @@ -4905,6 +4934,7 @@ bitint_large_huge::lower_bit_query (gimp > extract_true_false_edges_from_block (gsi_bb (m_gsi), > &true_edge, &false_edge); > m_gsi = gsi_after_labels (false_edge->dest); > + m_bb = NULL; > } > } > } > --- gcc/testsuite/gcc.dg/bitint-55.c.jj 2023-12-19 23:37:41.161537400 > +0100 > +++ gcc/testsuite/gcc.dg/bitint-55.c 2023-12-19 23:37:08.886986817 +0100 > @@ -0,0 +1,129 @@ > +/* PR tree-optimization/112941 */ > +/* { dg-do compile { target bitint } } */ > +/* { dg-options "-std=c23 -O2" } */ > + > +#if __BITINT_MAXWIDTH__ >= 4096 > +void > +f1 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) > +{ > + p[0] += (unsigned _BitInt(2048)) r; > + p[1] += (unsigned _BitInt(2048)) s; > + p[2] += (unsigned _BitInt(2048)) t; > + p[3] += (unsigned _BitInt(2048)) u; > +} > + > +void > +f2 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) > +{ > + p[0] -= (unsigned _BitInt(2048)) r; > + p[1] -= (unsigned _BitInt(2048)) s; > + p[2] -= (unsigned _BitInt(2048)) t; > + p[3] -= (unsigned _BitInt(2048)) u; > +} > + > +void > +f3 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) > +{ > + p[0] += (unsigned _BitInt(2110)) r; > + p[1] += (unsigned _BitInt(2110)) s; > + p[2] += (unsigned _BitInt(2110)) t; > + p[3] += (unsigned _BitInt(2110)) u; > +} > + > +void > +f4 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u) > +{ > + p[0] -= (unsigned _BitInt(2110)) r; > + p[1] -= (unsigned _BitInt(2110)) s; > + p[2] -= (unsigned _BitInt(2110)) t; > + p[3] -= (unsigned _BitInt(2110)) u; > +} > + > +void > +f5 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, > _BitInt(231) u) > +{ > + p[0] += (unsigned _BitInt(2048)) r; > + p[1] += (unsigned _BitInt(2048)) s; > + p[2] += (unsigned _BitInt(2048)) t; > + p[3] += (unsigned _BitInt(2048)) u; > +} > + > +void > +f6 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, > _BitInt(231) u) > +{ > + p[0] -= (unsigned _BitInt(2048)) r; > + p[1] -= (unsigned _BitInt(2048)) s; > + p[2] -= (unsigned _BitInt(2048)) t; > + p[3] -= (unsigned _BitInt(2048)) u; > +} > + > +void > +f7 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, > _BitInt(231) u) > +{ > + p[0] += (unsigned _BitInt(2110)) r; > + p[1] += (unsigned _BitInt(2110)) s; > + p[2] += (unsigned _BitInt(2110)) t; > + p[3] += (unsigned _BitInt(2110)) u; > +} > + > +void > +f8 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, > _BitInt(231) u) > +{ > + p[0] -= (unsigned _BitInt(2110)) r; > + p[1] -= (unsigned _BitInt(2110)) s; > + p[2] -= (unsigned _BitInt(2110)) t; > + p[3] -= (unsigned _BitInt(2110)) u; > +} > + > +#if __SIZEOF_INT128__ > +void > +f9 (_BitInt(4096) *p, __int128 r) > +{ > + p[0] += (unsigned _BitInt(2048)) r; > +} > + > +void > +f10 (_BitInt(4094) *p, __int128 r) > +{ > + p[0] -= (unsigned _BitInt(2048)) r; > +} > + > +void > +f11 (_BitInt(4096) *p, __int128 r) > +{ > + p[0] += (unsigned _BitInt(2110)) r; > +} > + > +void > +f12 (_BitInt(4094) *p, __int128 r) > +{ > + p[0] -= (unsigned _BitInt(2110)) r; > +} > + > +void > +f13 (unsigned _BitInt(4096) *p, __int128 r) > +{ > + p[0] += (unsigned _BitInt(2048)) r; > +} > + > +void > +f14 (unsigned _BitInt(4094) *p, __int128 r) > +{ > + p[0] -= (unsigned _BitInt(2048)) r; > +} > + > +void > +f15 (unsigned _BitInt(4096) *p, __int128 r) > +{ > + p[0] += (unsigned _BitInt(2110)) r; > +} > + > +void > +f16 (unsigned _BitInt(4094) *p, __int128 r) > +{ > + p[0] -= (unsigned _BitInt(2110)) r; > +} > +#endif > +#else > +int i; > +#endif > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)