On Wed, 15 Jun 2022, Jakub Jelinek wrote:

> Hi!
> 
> Both IFN_ATOMIC_BIT_TEST_AND_* and IFN_ATOMIC_*_FETCH_CMP_0 ifns
> are matched if their corresponding optab is implemented for the particular
> mode.  The fact that those optabs are implemented doesn't guarantee
> they will succeed though, they can just FAIL in their expansion.
> The expansion in that case uses expand_atomic_fetch_op as fallback, but
> as has been reported and and can be reproduced on the testcases,
> even those can fail and we didn't have any fallback after that.
> For IFN_ATOMIC_BIT_TEST_AND_* we actually have such calls.  One is
> done whenever we lost lhs of the ifn at some point in between matching
> it in tree-ssa-ccp.cc and expansion.  The following patch for that case
> just falls through and expands as if there was a lhs, creates a temporary
> for it.  For the other expand_atomic_fetch_op call in the same expander
> and for the only expand_atomic_fetch_op call in the other, this falls
> back the hard way, by constructing a CALL_EXPR to the call from which
> the ifn has been matched and expanding that.  Either it is lucky and manages
> to expand inline, or it emits a libatomic API call.

I wonder if that matching to IFN_ATOMIC_BIT_TEST_AND_* should then
happen at the time we expand the original builtin call to RTL?  Or
did the matching expose secondary optimization opportunities?

The back-and-forth is a big ugly IMHO and it would suggest we can
always match to the IFN since we can fall back to the builtin
call as expansion...?

Richard.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2022-06-15  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR middle-end/105951
>       * builtins.cc (expand_ifn_atomic_bit_test_and): If
>       expand_atomic_fetch_op fails for the lhs == NULL_TREE case, fall
>       through into the optab code with gen_reg_rtx (mode) as target.
>       If second expand_atomic_fetch_op fails, construct a CALL_EXPR
>       and expand that.
>       (expand_ifn_atomic_op_fetch_cmp_0): If expand_atomic_fetch_op fails,
>       construct a CALL_EXPR and expand that.
> 
>       * gcc.target/i386/pr105951-1.c: New test.
>       * gcc.target/i386/pr105951-2.c: New test.
> 
> --- gcc/builtins.cc.jj        2022-05-16 11:14:57.499428916 +0200
> +++ gcc/builtins.cc   2022-06-14 18:30:35.564288606 +0200
> @@ -6250,15 +6250,19 @@ expand_ifn_atomic_bit_test_and (gcall *c
>  
>    if (lhs == NULL_TREE)
>      {
> -      val = expand_simple_binop (mode, ASHIFT, const1_rtx,
> -                              val, NULL_RTX, true, OPTAB_DIRECT);
> +      rtx val2 = expand_simple_binop (mode, ASHIFT, const1_rtx,
> +                                   val, NULL_RTX, true, OPTAB_DIRECT);
>        if (code == AND)
> -     val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
> -      expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false);
> -      return;
> +     val2 = expand_simple_unop (mode, NOT, val2, NULL_RTX, true);
> +      if (expand_atomic_fetch_op (const0_rtx, mem, val2, code, model, false))
> +     return;
>      }
>  
> -  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +  rtx target;
> +  if (lhs)
> +    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +  else
> +    target = gen_reg_rtx (mode);
>    enum insn_code icode = direct_optab_handler (optab, mode);
>    gcc_assert (icode != CODE_FOR_nothing);
>    create_output_operand (&ops[0], target, mode);
> @@ -6277,6 +6281,51 @@ expand_ifn_atomic_bit_test_and (gcall *c
>      val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
>    rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val,
>                                      code, model, false);
> +  if (!result)
> +    {
> +      built_in_function fcode;
> +      switch (code)
> +     {
> +     case IOR:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_FETCH_OR_1;
> +       else
> +         fcode = BUILT_IN_SYNC_FETCH_AND_OR_1;
> +       break;
> +     case XOR:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_FETCH_XOR_1;
> +       else
> +         fcode = BUILT_IN_SYNC_FETCH_AND_XOR_1;
> +       break;
> +     case AND:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_FETCH_AND_1;
> +       else
> +         fcode = BUILT_IN_SYNC_FETCH_AND_AND_1;
> +       break;
> +     default:
> +       gcc_unreachable ();
> +     }
> +      unsigned int bytes_log2
> +     = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
> +      gcc_assert (bytes_log2 < 5);
> +      fcode = (built_in_function) (fcode + bytes_log2);
> +      tree fndecl = builtin_decl_explicit (fcode);
> +      tree type = TREE_TYPE (TREE_TYPE (fndecl));
> +      tree tcall = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE 
> (fndecl)),
> +                        fndecl);
> +      tree exp = build_call_nary (type, tcall,
> +                               2 + (model != MEMMODEL_SYNC_SEQ_CST), ptr,
> +                               make_tree (type, val),
> +                               model != MEMMODEL_SYNC_SEQ_CST
> +                               ? gimple_call_arg (call, 3)
> +                               : integer_zero_node);
> +      result = expand_builtin (exp, gen_reg_rtx (mode), NULL_RTX,
> +                            mode, !lhs);
> +    }
> +  if (!lhs)
> +    return;
>    if (integer_onep (flag))
>      {
>        result = expand_simple_binop (mode, ASHIFTRT, result, bitval,
> @@ -6369,6 +6418,62 @@ expand_ifn_atomic_op_fetch_cmp_0 (gcall
>  
>    rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, op,
>                                      code, model, true);
> +  if (!result)
> +    {
> +      built_in_function fcode;
> +      switch (code)
> +     {
> +     case PLUS:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_ADD_FETCH_1;
> +       else
> +         fcode = BUILT_IN_SYNC_ADD_AND_FETCH_1;
> +       break;
> +     case MINUS:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_SUB_FETCH_1;
> +       else
> +         fcode = BUILT_IN_SYNC_SUB_AND_FETCH_1;
> +       break;
> +     case IOR:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_OR_FETCH_1;
> +       else
> +         fcode = BUILT_IN_SYNC_OR_AND_FETCH_1;
> +       break;
> +     case XOR:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_XOR_FETCH_1;
> +       else
> +         fcode = BUILT_IN_SYNC_XOR_AND_FETCH_1;
> +       break;
> +     case AND:
> +       if (model != MEMMODEL_SYNC_SEQ_CST)
> +         fcode = BUILT_IN_ATOMIC_AND_FETCH_1;
> +       else
> +         fcode = BUILT_IN_SYNC_AND_AND_FETCH_1;
> +       break;
> +     default:
> +       gcc_unreachable ();
> +     }
> +      unsigned int bytes_log2
> +     = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
> +      gcc_assert (bytes_log2 < 5);
> +      fcode = (built_in_function) (fcode + bytes_log2);
> +      tree fndecl = builtin_decl_explicit (fcode);
> +      tree type = TREE_TYPE (TREE_TYPE (fndecl));
> +      tree tcall = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE 
> (fndecl)),
> +                        fndecl);
> +      tree exp = build_call_nary (type, tcall,
> +                               2 + (model != MEMMODEL_SYNC_SEQ_CST), ptr,
> +                               arg,
> +                               model != MEMMODEL_SYNC_SEQ_CST
> +                               ? gimple_call_arg (call, 3)
> +                               : integer_zero_node);
> +      result = expand_builtin (exp, gen_reg_rtx (mode), NULL_RTX,
> +                            mode, !lhs);
> +    }
> +
>    if (lhs)
>      {
>        result = emit_store_flag_force (target, comp, result, const0_rtx, mode,
> --- gcc/testsuite/gcc.target/i386/pr105951-1.c.jj     2022-06-14 
> 19:06:09.452825662 +0200
> +++ gcc/testsuite/gcc.target/i386/pr105951-1.c        2022-06-14 
> 19:05:59.630924070 +0200
> @@ -0,0 +1,5 @@
> +/* PR middle-end/105951 */
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -march=i386" } */
> +
> +#include "pr98737-2.c"
> --- gcc/testsuite/gcc.target/i386/pr105951-2.c.jj     2022-06-14 
> 19:06:16.607753976 +0200
> +++ gcc/testsuite/gcc.target/i386/pr105951-2.c        2022-06-14 
> 19:06:22.394695990 +0200
> @@ -0,0 +1,5 @@
> +/* PR middle-end/105951 */
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -march=i386" } */
> +
> +#include "pr98737-4.c"
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstraße 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Reply via email to