On Wed, 15 Jun 2022, Jakub Jelinek wrote: > Hi! > > Both IFN_ATOMIC_BIT_TEST_AND_* and IFN_ATOMIC_*_FETCH_CMP_0 ifns > are matched if their corresponding optab is implemented for the particular > mode. The fact that those optabs are implemented doesn't guarantee > they will succeed though, they can just FAIL in their expansion. > The expansion in that case uses expand_atomic_fetch_op as fallback, but > as has been reported and and can be reproduced on the testcases, > even those can fail and we didn't have any fallback after that. > For IFN_ATOMIC_BIT_TEST_AND_* we actually have such calls. One is > done whenever we lost lhs of the ifn at some point in between matching > it in tree-ssa-ccp.cc and expansion. The following patch for that case > just falls through and expands as if there was a lhs, creates a temporary > for it. For the other expand_atomic_fetch_op call in the same expander > and for the only expand_atomic_fetch_op call in the other, this falls > back the hard way, by constructing a CALL_EXPR to the call from which > the ifn has been matched and expanding that. Either it is lucky and manages > to expand inline, or it emits a libatomic API call.
I wonder if that matching to IFN_ATOMIC_BIT_TEST_AND_* should then happen at the time we expand the original builtin call to RTL? Or did the matching expose secondary optimization opportunities? The back-and-forth is a big ugly IMHO and it would suggest we can always match to the IFN since we can fall back to the builtin call as expansion...? Richard. > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2022-06-15 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/105951 > * builtins.cc (expand_ifn_atomic_bit_test_and): If > expand_atomic_fetch_op fails for the lhs == NULL_TREE case, fall > through into the optab code with gen_reg_rtx (mode) as target. > If second expand_atomic_fetch_op fails, construct a CALL_EXPR > and expand that. > (expand_ifn_atomic_op_fetch_cmp_0): If expand_atomic_fetch_op fails, > construct a CALL_EXPR and expand that. > > * gcc.target/i386/pr105951-1.c: New test. > * gcc.target/i386/pr105951-2.c: New test. > > --- gcc/builtins.cc.jj 2022-05-16 11:14:57.499428916 +0200 > +++ gcc/builtins.cc 2022-06-14 18:30:35.564288606 +0200 > @@ -6250,15 +6250,19 @@ expand_ifn_atomic_bit_test_and (gcall *c > > if (lhs == NULL_TREE) > { > - val = expand_simple_binop (mode, ASHIFT, const1_rtx, > - val, NULL_RTX, true, OPTAB_DIRECT); > + rtx val2 = expand_simple_binop (mode, ASHIFT, const1_rtx, > + val, NULL_RTX, true, OPTAB_DIRECT); > if (code == AND) > - val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); > - expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false); > - return; > + val2 = expand_simple_unop (mode, NOT, val2, NULL_RTX, true); > + if (expand_atomic_fetch_op (const0_rtx, mem, val2, code, model, false)) > + return; > } > > - rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + rtx target; > + if (lhs) > + target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + else > + target = gen_reg_rtx (mode); > enum insn_code icode = direct_optab_handler (optab, mode); > gcc_assert (icode != CODE_FOR_nothing); > create_output_operand (&ops[0], target, mode); > @@ -6277,6 +6281,51 @@ expand_ifn_atomic_bit_test_and (gcall *c > val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); > rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val, > code, model, false); > + if (!result) > + { > + built_in_function fcode; > + switch (code) > + { > + case IOR: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_FETCH_OR_1; > + else > + fcode = BUILT_IN_SYNC_FETCH_AND_OR_1; > + break; > + case XOR: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_FETCH_XOR_1; > + else > + fcode = BUILT_IN_SYNC_FETCH_AND_XOR_1; > + break; > + case AND: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_FETCH_AND_1; > + else > + fcode = BUILT_IN_SYNC_FETCH_AND_AND_1; > + break; > + default: > + gcc_unreachable (); > + } > + unsigned int bytes_log2 > + = exact_log2 (GET_MODE_SIZE (mode).to_constant ()); > + gcc_assert (bytes_log2 < 5); > + fcode = (built_in_function) (fcode + bytes_log2); > + tree fndecl = builtin_decl_explicit (fcode); > + tree type = TREE_TYPE (TREE_TYPE (fndecl)); > + tree tcall = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE > (fndecl)), > + fndecl); > + tree exp = build_call_nary (type, tcall, > + 2 + (model != MEMMODEL_SYNC_SEQ_CST), ptr, > + make_tree (type, val), > + model != MEMMODEL_SYNC_SEQ_CST > + ? gimple_call_arg (call, 3) > + : integer_zero_node); > + result = expand_builtin (exp, gen_reg_rtx (mode), NULL_RTX, > + mode, !lhs); > + } > + if (!lhs) > + return; > if (integer_onep (flag)) > { > result = expand_simple_binop (mode, ASHIFTRT, result, bitval, > @@ -6369,6 +6418,62 @@ expand_ifn_atomic_op_fetch_cmp_0 (gcall > > rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, op, > code, model, true); > + if (!result) > + { > + built_in_function fcode; > + switch (code) > + { > + case PLUS: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_ADD_FETCH_1; > + else > + fcode = BUILT_IN_SYNC_ADD_AND_FETCH_1; > + break; > + case MINUS: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_SUB_FETCH_1; > + else > + fcode = BUILT_IN_SYNC_SUB_AND_FETCH_1; > + break; > + case IOR: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_OR_FETCH_1; > + else > + fcode = BUILT_IN_SYNC_OR_AND_FETCH_1; > + break; > + case XOR: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_XOR_FETCH_1; > + else > + fcode = BUILT_IN_SYNC_XOR_AND_FETCH_1; > + break; > + case AND: > + if (model != MEMMODEL_SYNC_SEQ_CST) > + fcode = BUILT_IN_ATOMIC_AND_FETCH_1; > + else > + fcode = BUILT_IN_SYNC_AND_AND_FETCH_1; > + break; > + default: > + gcc_unreachable (); > + } > + unsigned int bytes_log2 > + = exact_log2 (GET_MODE_SIZE (mode).to_constant ()); > + gcc_assert (bytes_log2 < 5); > + fcode = (built_in_function) (fcode + bytes_log2); > + tree fndecl = builtin_decl_explicit (fcode); > + tree type = TREE_TYPE (TREE_TYPE (fndecl)); > + tree tcall = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE > (fndecl)), > + fndecl); > + tree exp = build_call_nary (type, tcall, > + 2 + (model != MEMMODEL_SYNC_SEQ_CST), ptr, > + arg, > + model != MEMMODEL_SYNC_SEQ_CST > + ? gimple_call_arg (call, 3) > + : integer_zero_node); > + result = expand_builtin (exp, gen_reg_rtx (mode), NULL_RTX, > + mode, !lhs); > + } > + > if (lhs) > { > result = emit_store_flag_force (target, comp, result, const0_rtx, mode, > --- gcc/testsuite/gcc.target/i386/pr105951-1.c.jj 2022-06-14 > 19:06:09.452825662 +0200 > +++ gcc/testsuite/gcc.target/i386/pr105951-1.c 2022-06-14 > 19:05:59.630924070 +0200 > @@ -0,0 +1,5 @@ > +/* PR middle-end/105951 */ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-options "-O2 -march=i386" } */ > + > +#include "pr98737-2.c" > --- gcc/testsuite/gcc.target/i386/pr105951-2.c.jj 2022-06-14 > 19:06:16.607753976 +0200 > +++ gcc/testsuite/gcc.target/i386/pr105951-2.c 2022-06-14 > 19:06:22.394695990 +0200 > @@ -0,0 +1,5 @@ > +/* PR middle-end/105951 */ > +/* { dg-do compile { target ia32 } } */ > +/* { dg-options "-O2 -march=i386" } */ > + > +#include "pr98737-4.c" > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstraße 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)