Hi!

THe lowering of .{ADD,SUB,MUL}_OVERFLOW ifns is optimized, so that we don't
in the common cases uselessly don't create a large _Complex _BitInt
temporary with the first (real) part being the result and second (imag) part
just being a huge 0 or 1, although we still do that if it can't be done.
The optimizable_arith_overflow function checks when that is possible, like
whether the ifn result is used at most twice, once in REALPART_EXPR and once
in IMAGPART_EXPR in the same bb, etc.  For IMAGPART_EXPR it then checks
if it has a single use which is a cast to some integral non-bitint type
(usually bool or int etc.).  The final check is whether that cast stmt
appears after the REALPART_EXPR (the usual case), in that case it is
optimizable, otherwise it is not (because the lowering for optimizable
ifns of this kind is done at the location of the REALPART_EXPR and it
tweaks the IMAGPART_EXPR cast location at that point, so otherwise it
would be set after use.

Now, we also have an optimization for the REALPART_EXPR lhs being used
in a single stmt - store in the same bb, in that case we don't have to
store the real part result in a temporary but it can go directly into
memory.
Except that nothing checks for the IMAGPART_EXPR cast being before or after
the store in this case, so the following testcase ICEs because we have
a use before a def stmt.

In bar (the function handled right already before this patch) we have
  _6 = .SUB_OVERFLOW (y_4(D), x_5(D));
  _1 = REALPART_EXPR <_6>;
  _2 = IMAGPART_EXPR <_6>;
  a = _1;
  _3 = (int) _2;
  baz (_3);
before the lowering, so we can just store the limbs of the .SUB_OVERFLOW
into the limbs of a variable and while doing that compute the value we
eventually store into _3 instead of the former a = _1; stmt.
In foo we have
  _5 = .SUB_OVERFLOW (y_3(D), x_4(D));
  _1 = REALPART_EXPR <_5>;
  _2 = IMAGPART_EXPR <_5>;
  t_6 = (int) _2;
  baz (t_6);
  a = _1;
and we can't do that because the lowering would be at the a = _1; stmt
and would try to set t_6 to the overflow flag at that point.  We don't
need to punt completely and mark _5 as _Complex _BitInt VAR_DECL though
in this case, all we need is not merge the a = _1; store with the
.SUB_OVERFLOW and REALPART_EXPR/IMAGPART_EXPR lowering.  So, add _1
to m_names and lower the first 3 stmts at the _1 = REALPART_EXPR <_5>;
location, optimizable_arith_overflow returned non-zero and so the
cast after IMAGPART_EXPR was after it and then a = _1; will copy from
the temporary VAR_DECL to memory.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/15/14?

2025-09-10  Jakub Jelinek  <ja...@redhat.com>

        PR middle-end/121828
        * gimple-lower-bitint.cc (gimple_lower_bitint): For REALPART_EXPR
        consumed by store in the same bb and with REALPART_EXPR from
        optimizable_arith_overflow, don't add REALPART_EXPR lhs to
        the m_names bitmap only if the cast from IMAGPART_EXPR doesn't
        appear in between the REALPART_EXPR and the store.

        * gcc.dg/bitint-126.c: New test.

--- gcc/gimple-lower-bitint.cc.jj       2025-09-04 10:45:14.704133177 +0200
+++ gcc/gimple-lower-bitint.cc  2025-09-09 19:33:33.801286116 +0200
@@ -7232,15 +7232,62 @@ gimple_lower_bitint (void)
              if (is_gimple_assign (SSA_NAME_DEF_STMT (s)))
                switch (gimple_assign_rhs_code (SSA_NAME_DEF_STMT (s)))
                  {
+                 case REALPART_EXPR:
                  case IMAGPART_EXPR:
                    {
-                     tree rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (s));
+                     gimple *ds = SSA_NAME_DEF_STMT (s);
+                     tree rhs1 = gimple_assign_rhs1 (ds);
                      rhs1 = TREE_OPERAND (rhs1, 0);
                      if (TREE_CODE (rhs1) == SSA_NAME)
                        {
                          gimple *g = SSA_NAME_DEF_STMT (rhs1);
                          if (optimizable_arith_overflow (g))
-                           continue;
+                           {
+                             if (gimple_assign_rhs_code (ds) == IMAGPART_EXPR)
+                               continue;
+                             if (gimple_store_p (use_stmt))
+                               {
+                                 /* Punt if the cast use of IMAGPART_EXPR stmt
+                                    appears before the store use_stmt, because
+                                    optimizable arith overflow can't be
+                                    lowered at the store location in that case.
+                                    See PR121828.  */
+                                 gimple_stmt_iterator gsi
+                                   = gsi_for_stmt (use_stmt);
+                                 unsigned int cnt = 0;
+                                 do
+                                   {
+                                     gsi_prev_nondebug (&gsi);
+                                     if (gsi_end_p (gsi))
+                                       break;
+                                     gimple *g2 = gsi_stmt (gsi);
+                                     if (g2 == ds)
+                                       break;
+                                     if (++cnt == 64)
+                                       break;
+                                     if (!gimple_assign_cast_p (g2))
+                                       continue;
+                                     tree rhs2 = gimple_assign_rhs1 (g2);
+                                     if (TREE_CODE (rhs2) != SSA_NAME)
+                                       continue;
+                                     gimple *g3 = SSA_NAME_DEF_STMT (rhs2);
+                                     if (!is_gimple_assign (g3))
+                                       continue;
+                                     if (gimple_assign_rhs_code (g3)
+                                         != IMAGPART_EXPR)
+                                       continue;
+                                     rhs2 = gimple_assign_rhs1 (g3);
+                                     rhs2 = TREE_OPERAND (rhs2, 0);
+                                     if (rhs2 != rhs1)
+                                       continue;
+                                     cnt = 64;
+                                     break;
+                                   }
+                                 while (1);
+                                 if (cnt == 64)
+                                   break;
+                               }
+                           }
                        }
                    }
                    /* FALLTHRU */
@@ -7251,7 +7298,6 @@ gimple_lower_bitint (void)
                  case EXACT_DIV_EXPR:
                  case TRUNC_MOD_EXPR:
                  case FIX_TRUNC_EXPR:
-                 case REALPART_EXPR:
                    if (gimple_store_p (use_stmt)
                        && is_gimple_assign (use_stmt)
                        && !gimple_has_volatile_ops (use_stmt)
--- gcc/testsuite/gcc.dg/bitint-126.c.jj        2025-09-09 19:40:51.622492315 
+0200
+++ gcc/testsuite/gcc.dg/bitint-126.c   2025-09-09 19:43:17.238565340 +0200
@@ -0,0 +1,26 @@
+/* PR middle-end/121828 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-std=c23 -O2" } */
+
+void baz (int);
+#if __BITINT_MAXWIDTH__ >= 255
+unsigned _BitInt(255) a;
+
+void
+foo (int x, int y)
+{
+  unsigned _BitInt(255) b;
+  int t = __builtin_sub_overflow (y, x, &b);
+  baz (t);
+  a = b;
+}
+
+void
+bar (int x, int y)
+{
+  unsigned _BitInt(255) b;
+  bool t = __builtin_sub_overflow (y, x, &b);
+  a = b;
+  baz (t);
+}
+#endif

        Jakub

Reply via email to