On Wed, 20 Dec 2023, Jakub Jelinek wrote:

> Hi!
> 
> The following patch fixes 2 issues in handling of casts for mergeable
> stmts.
> The first hunk fixes the case when we have two nested casts (typically
> after optimization that is zero-extension of a sign-extension because
> everything else should have been folded into a single cast).  If
> the lowering of the outer cast needs to make the code conditional
> (e.g.
>   for (...)
>     {
>       if (idx <= 32)
>       {
>         if (idx < 32)
>           { ... handle_operand (idx); ... }
>         else
>           { ... handle_operand (32); ... }
>       }
>       ...
>     }
> ) and the lowering of the inner one as well, right now it creates invalid
> SSA form, because even for the inner cast we need a PHI on the loop
> and the PHI argument from the latch edge is a SSA_NAME initialized in
> the conditionally executed bb.  The hunk fixes that by detecting such
> a case and adding further PHI nodes at the end of the ifs such that
> the right value propagates to the next loop iteration.  We can use
> 0 arguments for the other edges because the inner operand handling
> is only done for the first set of iterations and then the other ifs take
> over.
> 
> The rest fixes a case of again invalid SSA form, when for a sign extension
> we need to use the 0 or -1 value initialized by earlier iteration in
> a constant idx case, the code was using the value of the loop PHI argument
> from latch edge rather than result; that is correct for cases expanded
> in straight line code after the loop, but not inside of the loop for the
> cases of handle_cast conditionals, there we should use PHI result.  This
> is done in the second hunk and supported by the remaining hunks, where
> it clears m_bb to tell the code we aren't in the loop anymore.
> 
> Note, this patch doesn't deal with similar problems during multiplication,
> division, floating casts etc. where we just emit a library call.  I'll
> need to make sure in that case we don't merge more than one cast per
> operand.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2023-12-20  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/112941
>       * gimple-lower-bitint.cc (bitint_large_huge::handle_cast): If
>       save_cast_conditional, instead of adding assignment of t4 to
>       m_data[save_data_cnt + 1] before m_gsi, add phi nodes such that
>       t4 propagates to m_bb loop.  For constant idx, use
>       m_data[save_data_cnt] rather than m_data[save_data_cnt + 1] if inside
>       of the m_bb loop.
>       (bitint_large_huge::lower_mergeable_stmt): Clear m_bb when no longer
>       expanding inside of that loop.
>       (bitint_large_huge::lower_comparison_stmt): Likewise.
>       (bitint_large_huge::lower_addsub_overflow): Likewise.
>       (bitint_large_huge::lower_mul_overflow): Likewise.
>       (bitint_large_huge::lower_bit_query): Likewise.
> 
>       * gcc.dg/bitint-55.c: New test.
> 
> --- gcc/gimple-lower-bitint.cc.jj     2023-12-15 10:10:51.000000000 +0100
> +++ gcc/gimple-lower-bitint.cc        2023-12-19 18:32:52.937388747 +0100
> @@ -1401,8 +1401,29 @@ bitint_large_huge::handle_cast (tree lhs
>             add_phi_arg (phi, m_data[save_data_cnt], edge_false,
>                          UNKNOWN_LOCATION);
>             add_phi_arg (phi, ext, edge_true_true, UNKNOWN_LOCATION);
> -           g = gimple_build_assign (m_data[save_data_cnt + 1], t4);
> -           insert_before (g);
> +           if (!save_cast_conditional)
> +             {
> +               g = gimple_build_assign (m_data[save_data_cnt + 1], t4);
> +               insert_before (g);
> +             }
> +           else
> +             for (basic_block bb = gsi_bb (m_gsi);;)
> +               {
> +                 edge e1 = single_succ_edge (bb);
> +                 edge e2 = find_edge (e1->dest, m_bb), e3;
> +                 tree t5 = (e2 ? m_data[save_data_cnt + 1]
> +                            : make_ssa_name (m_limb_type));
> +                 phi = create_phi_node (t5, e1->dest);
> +                 edge_iterator ei;
> +                 FOR_EACH_EDGE (e3, ei, e1->dest->preds)
> +                   add_phi_arg (phi, (e3 == e1 ? t4
> +                                      : build_zero_cst (m_limb_type)),
> +                                e3, UNKNOWN_LOCATION);
> +                 if (e2)
> +                   break;
> +                 t4 = t5;
> +                 bb = e1->dest;
> +               }
>           }
>         if (m_bitfld_load)
>           {
> @@ -1470,6 +1491,8 @@ bitint_large_huge::handle_cast (tree lhs
>               m_data_cnt = tree_to_uhwi (m_data[save_data_cnt + 2]);
>             if (TYPE_UNSIGNED (rhs_type))
>               t = build_zero_cst (m_limb_type);
> +           else if (m_bb)
> +             t = m_data[save_data_cnt];
>             else
>               t = m_data[save_data_cnt + 1];
>           }
> @@ -2586,6 +2609,7 @@ bitint_large_huge::lower_mergeable_stmt
>               m_gsi = gsi_after_labels (edge_bb);
>             else
>               m_gsi = gsi_for_stmt (stmt);
> +           m_bb = NULL;
>           }
>       }
>      }
> @@ -2712,6 +2736,7 @@ bitint_large_huge::lower_mergeable_stmt
>                                    NULL_TREE, NULL_TREE);
>             insert_before (g);
>             m_gsi = gsi_for_stmt (stmt);
> +           m_bb = NULL;
>           }
>       }
>      }
> @@ -2890,6 +2915,7 @@ bitint_large_huge::lower_comparison_stmt
>         extract_true_false_edges_from_block (gsi_bb (m_gsi),
>                                              &true_edge, &false_edge);
>         m_gsi = gsi_after_labels (false_edge->dest);
> +       m_bb = NULL;
>       }
>      }
>  
> @@ -4208,6 +4234,7 @@ bitint_large_huge::lower_addsub_overflow
>                                    NULL_TREE, NULL_TREE);
>             insert_before (g);
>             m_gsi = gsi_for_stmt (final_stmt);
> +           m_bb = NULL;
>           }
>       }
>      }
> @@ -4405,6 +4432,7 @@ bitint_large_huge::lower_mul_overflow (t
>                                                      &true_edge,
>                                                      &false_edge);
>                 m_gsi = gsi_after_labels (false_edge->dest);
> +               m_bb = NULL;
>               }
>           }
>  
> @@ -4744,6 +4772,7 @@ bitint_large_huge::lower_bit_query (gimp
>                   m_gsi = gsi_after_labels (edge_bb);
>                 else
>                   m_gsi = gsi_for_stmt (stmt);
> +               m_bb = NULL;
>               }
>           }
>       }
> @@ -4905,6 +4934,7 @@ bitint_large_huge::lower_bit_query (gimp
>             extract_true_false_edges_from_block (gsi_bb (m_gsi),
>                                                  &true_edge, &false_edge);
>             m_gsi = gsi_after_labels (false_edge->dest);
> +           m_bb = NULL;
>           }
>       }
>      }
> --- gcc/testsuite/gcc.dg/bitint-55.c.jj       2023-12-19 23:37:41.161537400 
> +0100
> +++ gcc/testsuite/gcc.dg/bitint-55.c  2023-12-19 23:37:08.886986817 +0100
> @@ -0,0 +1,129 @@
> +/* PR tree-optimization/112941 */
> +/* { dg-do compile { target bitint } } */
> +/* { dg-options "-std=c23 -O2" } */
> +
> +#if __BITINT_MAXWIDTH__ >= 4096
> +void
> +f1 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
> +{
> +  p[0] += (unsigned _BitInt(2048)) r;
> +  p[1] += (unsigned _BitInt(2048)) s;
> +  p[2] += (unsigned _BitInt(2048)) t;
> +  p[3] += (unsigned _BitInt(2048)) u;
> +}
> +
> +void
> +f2 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
> +{
> +  p[0] -= (unsigned _BitInt(2048)) r;
> +  p[1] -= (unsigned _BitInt(2048)) s;
> +  p[2] -= (unsigned _BitInt(2048)) t;
> +  p[3] -= (unsigned _BitInt(2048)) u;
> +}
> +
> +void
> +f3 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
> +{
> +  p[0] += (unsigned _BitInt(2110)) r;
> +  p[1] += (unsigned _BitInt(2110)) s;
> +  p[2] += (unsigned _BitInt(2110)) t;
> +  p[3] += (unsigned _BitInt(2110)) u;
> +}
> +
> +void
> +f4 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
> +{
> +  p[0] -= (unsigned _BitInt(2110)) r;
> +  p[1] -= (unsigned _BitInt(2110)) s;
> +  p[2] -= (unsigned _BitInt(2110)) t;
> +  p[3] -= (unsigned _BitInt(2110)) u;
> +}
> +
> +void
> +f5 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, 
> _BitInt(231) u)
> +{
> +  p[0] += (unsigned _BitInt(2048)) r;
> +  p[1] += (unsigned _BitInt(2048)) s;
> +  p[2] += (unsigned _BitInt(2048)) t;
> +  p[3] += (unsigned _BitInt(2048)) u;
> +}
> +
> +void
> +f6 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, 
> _BitInt(231) u)
> +{
> +  p[0] -= (unsigned _BitInt(2048)) r;
> +  p[1] -= (unsigned _BitInt(2048)) s;
> +  p[2] -= (unsigned _BitInt(2048)) t;
> +  p[3] -= (unsigned _BitInt(2048)) u;
> +}
> +
> +void
> +f7 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, 
> _BitInt(231) u)
> +{
> +  p[0] += (unsigned _BitInt(2110)) r;
> +  p[1] += (unsigned _BitInt(2110)) s;
> +  p[2] += (unsigned _BitInt(2110)) t;
> +  p[3] += (unsigned _BitInt(2110)) u;
> +}
> +
> +void
> +f8 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, 
> _BitInt(231) u)
> +{
> +  p[0] -= (unsigned _BitInt(2110)) r;
> +  p[1] -= (unsigned _BitInt(2110)) s;
> +  p[2] -= (unsigned _BitInt(2110)) t;
> +  p[3] -= (unsigned _BitInt(2110)) u;
> +}
> +
> +#if __SIZEOF_INT128__
> +void
> +f9 (_BitInt(4096) *p, __int128 r)
> +{
> +  p[0] += (unsigned _BitInt(2048)) r;
> +}
> +
> +void
> +f10 (_BitInt(4094) *p, __int128 r)
> +{
> +  p[0] -= (unsigned _BitInt(2048)) r;
> +}
> +
> +void
> +f11 (_BitInt(4096) *p, __int128 r)
> +{
> +  p[0] += (unsigned _BitInt(2110)) r;
> +}
> +
> +void
> +f12 (_BitInt(4094) *p, __int128 r)
> +{
> +  p[0] -= (unsigned _BitInt(2110)) r;
> +}
> +
> +void
> +f13 (unsigned _BitInt(4096) *p, __int128 r)
> +{
> +  p[0] += (unsigned _BitInt(2048)) r;
> +}
> +
> +void
> +f14 (unsigned _BitInt(4094) *p, __int128 r)
> +{
> +  p[0] -= (unsigned _BitInt(2048)) r;
> +}
> +
> +void
> +f15 (unsigned _BitInt(4096) *p, __int128 r)
> +{
> +  p[0] += (unsigned _BitInt(2110)) r;
> +}
> +
> +void
> +f16 (unsigned _BitInt(4094) *p, __int128 r)
> +{
> +  p[0] -= (unsigned _BitInt(2110)) r;
> +}
> +#endif
> +#else
> +int i;
> +#endif
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to