Hi!

Both IFN_ATOMIC_BIT_TEST_AND_* and IFN_ATOMIC_*_FETCH_CMP_0 ifns
are matched if their corresponding optab is implemented for the particular
mode.  The fact that those optabs are implemented doesn't guarantee
they will succeed though, they can just FAIL in their expansion.
The expansion in that case uses expand_atomic_fetch_op as fallback, but
as has been reported and and can be reproduced on the testcases,
even those can fail and we didn't have any fallback after that.
For IFN_ATOMIC_BIT_TEST_AND_* we actually have such calls.  One is
done whenever we lost lhs of the ifn at some point in between matching
it in tree-ssa-ccp.cc and expansion.  The following patch for that case
just falls through and expands as if there was a lhs, creates a temporary
for it.  For the other expand_atomic_fetch_op call in the same expander
and for the only expand_atomic_fetch_op call in the other, this falls
back the hard way, by constructing a CALL_EXPR to the call from which
the ifn has been matched and expanding that.  Either it is lucky and manages
to expand inline, or it emits a libatomic API call.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-06-15  Jakub Jelinek  <ja...@redhat.com>

        PR middle-end/105951
        * builtins.cc (expand_ifn_atomic_bit_test_and): If
        expand_atomic_fetch_op fails for the lhs == NULL_TREE case, fall
        through into the optab code with gen_reg_rtx (mode) as target.
        If second expand_atomic_fetch_op fails, construct a CALL_EXPR
        and expand that.
        (expand_ifn_atomic_op_fetch_cmp_0): If expand_atomic_fetch_op fails,
        construct a CALL_EXPR and expand that.

        * gcc.target/i386/pr105951-1.c: New test.
        * gcc.target/i386/pr105951-2.c: New test.

--- gcc/builtins.cc.jj  2022-05-16 11:14:57.499428916 +0200
+++ gcc/builtins.cc     2022-06-14 18:30:35.564288606 +0200
@@ -6250,15 +6250,19 @@ expand_ifn_atomic_bit_test_and (gcall *c
 
   if (lhs == NULL_TREE)
     {
-      val = expand_simple_binop (mode, ASHIFT, const1_rtx,
-                                val, NULL_RTX, true, OPTAB_DIRECT);
+      rtx val2 = expand_simple_binop (mode, ASHIFT, const1_rtx,
+                                     val, NULL_RTX, true, OPTAB_DIRECT);
       if (code == AND)
-       val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
-      expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false);
-      return;
+       val2 = expand_simple_unop (mode, NOT, val2, NULL_RTX, true);
+      if (expand_atomic_fetch_op (const0_rtx, mem, val2, code, model, false))
+       return;
     }
 
-  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx target;
+  if (lhs)
+    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  else
+    target = gen_reg_rtx (mode);
   enum insn_code icode = direct_optab_handler (optab, mode);
   gcc_assert (icode != CODE_FOR_nothing);
   create_output_operand (&ops[0], target, mode);
@@ -6277,6 +6281,51 @@ expand_ifn_atomic_bit_test_and (gcall *c
     val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
   rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val,
                                       code, model, false);
+  if (!result)
+    {
+      built_in_function fcode;
+      switch (code)
+       {
+       case IOR:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_FETCH_OR_1;
+         else
+           fcode = BUILT_IN_SYNC_FETCH_AND_OR_1;
+         break;
+       case XOR:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_FETCH_XOR_1;
+         else
+           fcode = BUILT_IN_SYNC_FETCH_AND_XOR_1;
+         break;
+       case AND:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_FETCH_AND_1;
+         else
+           fcode = BUILT_IN_SYNC_FETCH_AND_AND_1;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+      unsigned int bytes_log2
+       = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
+      gcc_assert (bytes_log2 < 5);
+      fcode = (built_in_function) (fcode + bytes_log2);
+      tree fndecl = builtin_decl_explicit (fcode);
+      tree type = TREE_TYPE (TREE_TYPE (fndecl));
+      tree tcall = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)),
+                          fndecl);
+      tree exp = build_call_nary (type, tcall,
+                                 2 + (model != MEMMODEL_SYNC_SEQ_CST), ptr,
+                                 make_tree (type, val),
+                                 model != MEMMODEL_SYNC_SEQ_CST
+                                 ? gimple_call_arg (call, 3)
+                                 : integer_zero_node);
+      result = expand_builtin (exp, gen_reg_rtx (mode), NULL_RTX,
+                              mode, !lhs);
+    }
+  if (!lhs)
+    return;
   if (integer_onep (flag))
     {
       result = expand_simple_binop (mode, ASHIFTRT, result, bitval,
@@ -6369,6 +6418,62 @@ expand_ifn_atomic_op_fetch_cmp_0 (gcall
 
   rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, op,
                                       code, model, true);
+  if (!result)
+    {
+      built_in_function fcode;
+      switch (code)
+       {
+       case PLUS:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_ADD_FETCH_1;
+         else
+           fcode = BUILT_IN_SYNC_ADD_AND_FETCH_1;
+         break;
+       case MINUS:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_SUB_FETCH_1;
+         else
+           fcode = BUILT_IN_SYNC_SUB_AND_FETCH_1;
+         break;
+       case IOR:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_OR_FETCH_1;
+         else
+           fcode = BUILT_IN_SYNC_OR_AND_FETCH_1;
+         break;
+       case XOR:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_XOR_FETCH_1;
+         else
+           fcode = BUILT_IN_SYNC_XOR_AND_FETCH_1;
+         break;
+       case AND:
+         if (model != MEMMODEL_SYNC_SEQ_CST)
+           fcode = BUILT_IN_ATOMIC_AND_FETCH_1;
+         else
+           fcode = BUILT_IN_SYNC_AND_AND_FETCH_1;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+      unsigned int bytes_log2
+       = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
+      gcc_assert (bytes_log2 < 5);
+      fcode = (built_in_function) (fcode + bytes_log2);
+      tree fndecl = builtin_decl_explicit (fcode);
+      tree type = TREE_TYPE (TREE_TYPE (fndecl));
+      tree tcall = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)),
+                          fndecl);
+      tree exp = build_call_nary (type, tcall,
+                                 2 + (model != MEMMODEL_SYNC_SEQ_CST), ptr,
+                                 arg,
+                                 model != MEMMODEL_SYNC_SEQ_CST
+                                 ? gimple_call_arg (call, 3)
+                                 : integer_zero_node);
+      result = expand_builtin (exp, gen_reg_rtx (mode), NULL_RTX,
+                              mode, !lhs);
+    }
+
   if (lhs)
     {
       result = emit_store_flag_force (target, comp, result, const0_rtx, mode,
--- gcc/testsuite/gcc.target/i386/pr105951-1.c.jj       2022-06-14 
19:06:09.452825662 +0200
+++ gcc/testsuite/gcc.target/i386/pr105951-1.c  2022-06-14 19:05:59.630924070 
+0200
@@ -0,0 +1,5 @@
+/* PR middle-end/105951 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=i386" } */
+
+#include "pr98737-2.c"
--- gcc/testsuite/gcc.target/i386/pr105951-2.c.jj       2022-06-14 
19:06:16.607753976 +0200
+++ gcc/testsuite/gcc.target/i386/pr105951-2.c  2022-06-14 19:06:22.394695990 
+0200
@@ -0,0 +1,5 @@
+/* PR middle-end/105951 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=i386" } */
+
+#include "pr98737-4.c"

        Jakub

Reply via email to