Hi!

The following patch fixes 2 issues in handling of casts for mergeable
stmts.
The first hunk fixes the case when we have two nested casts (typically
after optimization that is zero-extension of a sign-extension because
everything else should have been folded into a single cast).  If
the lowering of the outer cast needs to make the code conditional
(e.g.
  for (...)
    {
      if (idx <= 32)
        {
          if (idx < 32)
            { ... handle_operand (idx); ... }
          else
            { ... handle_operand (32); ... }
        }
      ...
    }
) and the lowering of the inner one as well, right now it creates invalid
SSA form, because even for the inner cast we need a PHI on the loop
and the PHI argument from the latch edge is a SSA_NAME initialized in
the conditionally executed bb.  The hunk fixes that by detecting such
a case and adding further PHI nodes at the end of the ifs such that
the right value propagates to the next loop iteration.  We can use
0 arguments for the other edges because the inner operand handling
is only done for the first set of iterations and then the other ifs take
over.

The rest fixes a case of again invalid SSA form, when for a sign extension
we need to use the 0 or -1 value initialized by earlier iteration in
a constant idx case, the code was using the value of the loop PHI argument
from latch edge rather than result; that is correct for cases expanded
in straight line code after the loop, but not inside of the loop for the
cases of handle_cast conditionals, there we should use PHI result.  This
is done in the second hunk and supported by the remaining hunks, where
it clears m_bb to tell the code we aren't in the loop anymore.

Note, this patch doesn't deal with similar problems during multiplication,
division, floating casts etc. where we just emit a library call.  I'll
need to make sure in that case we don't merge more than one cast per
operand.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-12-20  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/112941
        * gimple-lower-bitint.cc (bitint_large_huge::handle_cast): If
        save_cast_conditional, instead of adding assignment of t4 to
        m_data[save_data_cnt + 1] before m_gsi, add phi nodes such that
        t4 propagates to m_bb loop.  For constant idx, use
        m_data[save_data_cnt] rather than m_data[save_data_cnt + 1] if inside
        of the m_bb loop.
        (bitint_large_huge::lower_mergeable_stmt): Clear m_bb when no longer
        expanding inside of that loop.
        (bitint_large_huge::lower_comparison_stmt): Likewise.
        (bitint_large_huge::lower_addsub_overflow): Likewise.
        (bitint_large_huge::lower_mul_overflow): Likewise.
        (bitint_large_huge::lower_bit_query): Likewise.

        * gcc.dg/bitint-55.c: New test.

--- gcc/gimple-lower-bitint.cc.jj       2023-12-15 10:10:51.000000000 +0100
+++ gcc/gimple-lower-bitint.cc  2023-12-19 18:32:52.937388747 +0100
@@ -1401,8 +1401,29 @@ bitint_large_huge::handle_cast (tree lhs
              add_phi_arg (phi, m_data[save_data_cnt], edge_false,
                           UNKNOWN_LOCATION);
              add_phi_arg (phi, ext, edge_true_true, UNKNOWN_LOCATION);
-             g = gimple_build_assign (m_data[save_data_cnt + 1], t4);
-             insert_before (g);
+             if (!save_cast_conditional)
+               {
+                 g = gimple_build_assign (m_data[save_data_cnt + 1], t4);
+                 insert_before (g);
+               }
+             else
+               for (basic_block bb = gsi_bb (m_gsi);;)
+                 {
+                   edge e1 = single_succ_edge (bb);
+                   edge e2 = find_edge (e1->dest, m_bb), e3;
+                   tree t5 = (e2 ? m_data[save_data_cnt + 1]
+                              : make_ssa_name (m_limb_type));
+                   phi = create_phi_node (t5, e1->dest);
+                   edge_iterator ei;
+                   FOR_EACH_EDGE (e3, ei, e1->dest->preds)
+                     add_phi_arg (phi, (e3 == e1 ? t4
+                                        : build_zero_cst (m_limb_type)),
+                                  e3, UNKNOWN_LOCATION);
+                   if (e2)
+                     break;
+                   t4 = t5;
+                   bb = e1->dest;
+                 }
            }
          if (m_bitfld_load)
            {
@@ -1470,6 +1491,8 @@ bitint_large_huge::handle_cast (tree lhs
                m_data_cnt = tree_to_uhwi (m_data[save_data_cnt + 2]);
              if (TYPE_UNSIGNED (rhs_type))
                t = build_zero_cst (m_limb_type);
+             else if (m_bb)
+               t = m_data[save_data_cnt];
              else
                t = m_data[save_data_cnt + 1];
            }
@@ -2586,6 +2609,7 @@ bitint_large_huge::lower_mergeable_stmt
                m_gsi = gsi_after_labels (edge_bb);
              else
                m_gsi = gsi_for_stmt (stmt);
+             m_bb = NULL;
            }
        }
     }
@@ -2712,6 +2736,7 @@ bitint_large_huge::lower_mergeable_stmt
                                     NULL_TREE, NULL_TREE);
              insert_before (g);
              m_gsi = gsi_for_stmt (stmt);
+             m_bb = NULL;
            }
        }
     }
@@ -2890,6 +2915,7 @@ bitint_large_huge::lower_comparison_stmt
          extract_true_false_edges_from_block (gsi_bb (m_gsi),
                                               &true_edge, &false_edge);
          m_gsi = gsi_after_labels (false_edge->dest);
+         m_bb = NULL;
        }
     }
 
@@ -4208,6 +4234,7 @@ bitint_large_huge::lower_addsub_overflow
                                     NULL_TREE, NULL_TREE);
              insert_before (g);
              m_gsi = gsi_for_stmt (final_stmt);
+             m_bb = NULL;
            }
        }
     }
@@ -4405,6 +4432,7 @@ bitint_large_huge::lower_mul_overflow (t
                                                       &true_edge,
                                                       &false_edge);
                  m_gsi = gsi_after_labels (false_edge->dest);
+                 m_bb = NULL;
                }
            }
 
@@ -4744,6 +4772,7 @@ bitint_large_huge::lower_bit_query (gimp
                    m_gsi = gsi_after_labels (edge_bb);
                  else
                    m_gsi = gsi_for_stmt (stmt);
+                 m_bb = NULL;
                }
            }
        }
@@ -4905,6 +4934,7 @@ bitint_large_huge::lower_bit_query (gimp
              extract_true_false_edges_from_block (gsi_bb (m_gsi),
                                                   &true_edge, &false_edge);
              m_gsi = gsi_after_labels (false_edge->dest);
+             m_bb = NULL;
            }
        }
     }
--- gcc/testsuite/gcc.dg/bitint-55.c.jj 2023-12-19 23:37:41.161537400 +0100
+++ gcc/testsuite/gcc.dg/bitint-55.c    2023-12-19 23:37:08.886986817 +0100
@@ -0,0 +1,129 @@
+/* PR tree-optimization/112941 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-std=c23 -O2" } */
+
+#if __BITINT_MAXWIDTH__ >= 4096
+void
+f1 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
+{
+  p[0] += (unsigned _BitInt(2048)) r;
+  p[1] += (unsigned _BitInt(2048)) s;
+  p[2] += (unsigned _BitInt(2048)) t;
+  p[3] += (unsigned _BitInt(2048)) u;
+}
+
+void
+f2 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
+{
+  p[0] -= (unsigned _BitInt(2048)) r;
+  p[1] -= (unsigned _BitInt(2048)) s;
+  p[2] -= (unsigned _BitInt(2048)) t;
+  p[3] -= (unsigned _BitInt(2048)) u;
+}
+
+void
+f3 (_BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
+{
+  p[0] += (unsigned _BitInt(2110)) r;
+  p[1] += (unsigned _BitInt(2110)) s;
+  p[2] += (unsigned _BitInt(2110)) t;
+  p[3] += (unsigned _BitInt(2110)) u;
+}
+
+void
+f4 (_BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, _BitInt(231) u)
+{
+  p[0] -= (unsigned _BitInt(2110)) r;
+  p[1] -= (unsigned _BitInt(2110)) s;
+  p[2] -= (unsigned _BitInt(2110)) t;
+  p[3] -= (unsigned _BitInt(2110)) u;
+}
+
+void
+f5 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, 
_BitInt(231) u)
+{
+  p[0] += (unsigned _BitInt(2048)) r;
+  p[1] += (unsigned _BitInt(2048)) s;
+  p[2] += (unsigned _BitInt(2048)) t;
+  p[3] += (unsigned _BitInt(2048)) u;
+}
+
+void
+f6 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, 
_BitInt(231) u)
+{
+  p[0] -= (unsigned _BitInt(2048)) r;
+  p[1] -= (unsigned _BitInt(2048)) s;
+  p[2] -= (unsigned _BitInt(2048)) t;
+  p[3] -= (unsigned _BitInt(2048)) u;
+}
+
+void
+f7 (unsigned _BitInt(4096) *p, int r, _BitInt(115) s, _BitInt(128) t, 
_BitInt(231) u)
+{
+  p[0] += (unsigned _BitInt(2110)) r;
+  p[1] += (unsigned _BitInt(2110)) s;
+  p[2] += (unsigned _BitInt(2110)) t;
+  p[3] += (unsigned _BitInt(2110)) u;
+}
+
+void
+f8 (unsigned _BitInt(4094) *p, int r, _BitInt(115) s, _BitInt(128) t, 
_BitInt(231) u)
+{
+  p[0] -= (unsigned _BitInt(2110)) r;
+  p[1] -= (unsigned _BitInt(2110)) s;
+  p[2] -= (unsigned _BitInt(2110)) t;
+  p[3] -= (unsigned _BitInt(2110)) u;
+}
+
+#if __SIZEOF_INT128__
+void
+f9 (_BitInt(4096) *p, __int128 r)
+{
+  p[0] += (unsigned _BitInt(2048)) r;
+}
+
+void
+f10 (_BitInt(4094) *p, __int128 r)
+{
+  p[0] -= (unsigned _BitInt(2048)) r;
+}
+
+void
+f11 (_BitInt(4096) *p, __int128 r)
+{
+  p[0] += (unsigned _BitInt(2110)) r;
+}
+
+void
+f12 (_BitInt(4094) *p, __int128 r)
+{
+  p[0] -= (unsigned _BitInt(2110)) r;
+}
+
+void
+f13 (unsigned _BitInt(4096) *p, __int128 r)
+{
+  p[0] += (unsigned _BitInt(2048)) r;
+}
+
+void
+f14 (unsigned _BitInt(4094) *p, __int128 r)
+{
+  p[0] -= (unsigned _BitInt(2048)) r;
+}
+
+void
+f15 (unsigned _BitInt(4096) *p, __int128 r)
+{
+  p[0] += (unsigned _BitInt(2110)) r;
+}
+
+void
+f16 (unsigned _BitInt(4094) *p, __int128 r)
+{
+  p[0] -= (unsigned _BitInt(2110)) r;
+}
+#endif
+#else
+int i;
+#endif

        Jakub

Reply via email to