On Thu, 2 Mar 2017, Jakub Jelinek wrote: > Hi! > > The __atomic/__sync builtins have nothrow conditional on > -fnon-call-exceptions, but internal fns have right now only constant flags. > As per discussions on IRC, this patch removes ECF_NOTHROW from those 4 ifns > and instead calls gimple_call_set_nothrow when creating those to say > if it can or can't throw. Furthermore, if we need to add statements after > the former builtin, now ifn with -fnon-call-exceptions, we need to insert > them on the fallthru edge instead of right after it. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-03-02 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/79805 > * internal-fn.def (ATOMIC_BIT_TEST_AND_SET, ATOMIC_BIT_TEST_AND_RESET, > ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_COMPARE_EXCHANGE): Remove > ECF_NOTHROW. > * gimple-fold.c (fold_builtin_atomic_compare_exchange): Set > gimple_call_nothrow_p flag based on flag_non_call_exceptions. For > -fnon-call-exceptions emit following stmts on the fallthrough edge. > (optimize_atomic_bit_test_and): Similarly, except don't create new > bb if inserting just debug stmts on the edge, try to insert them > on the fallthru bb or just reset debug stmts. > > * g++.dg/opt/pr79805.C: New test. > > --- gcc/internal-fn.def.jj 2017-02-09 14:55:38.000000000 +0100 > +++ gcc/internal-fn.def 2017-03-02 13:14:43.659419232 +0100 > @@ -205,11 +205,13 @@ DEF_INTERNAL_FN (GOACC_TILE, ECF_NOTHROW > current target. */ > DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL) > > -/* Atomic functions. */ > -DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) > -DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, > NULL) > -DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) > -DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL) > +/* Atomic functions. These don't have ECF_NOTHROW because for > + -fnon-call-exceptions they can throw, otherwise we set > + gimple_call_nothrow_p on it. */ > +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF, NULL) > +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF, NULL) > +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF, NULL) > +DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF, NULL) > > /* To implement [[fallthrough]]. */ > DEF_INTERNAL_FN (FALLTHROUGH, ECF_LEAF | ECF_NOTHROW, NULL) > --- gcc/gimple-fold.c.jj 2017-02-07 16:40:45.000000000 +0100 > +++ gcc/gimple-fold.c 2017-03-02 16:04:51.304850077 +0100 > @@ -3533,6 +3533,8 @@ fold_builtin_atomic_compare_exchange (gi > tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); > tree ctype = build_complex_type (itype); > tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0); > + bool may_throw = false; > + edge e = NULL; > gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), > expected); > gsi_insert_before (gsi, g, GSI_SAME_STMT); > @@ -3558,19 +3560,43 @@ fold_builtin_atomic_compare_exchange (gi > gimple_set_vdef (g, gimple_vdef (stmt)); > gimple_set_vuse (g, gimple_vuse (stmt)); > SSA_NAME_DEF_STMT (gimple_vdef (g)) = g; > - if (gimple_call_lhs (stmt)) > + tree oldlhs = gimple_call_lhs (stmt); > + if (flag_non_call_exceptions && stmt_ends_bb_p (stmt))
I think a more appropriate check is stmt_can_throw_internal (stmt) > { > - gsi_insert_before (gsi, g, GSI_SAME_STMT); > + may_throw = true; and 'throws' rather than 'may_throw'. > + gimple_call_set_lhs (stmt, NULL_TREE); > + gsi_replace (gsi, g, true); > + e = find_fallthru_edge (gsi_bb (*gsi)->succs); > + } > + gimple_call_set_nothrow (as_a <gcall *> (g), flag_non_call_exceptions == > 0); it should copy nothrow state from the source(s) of the folding if you consider flag_non_call_exceptions == true in this fn but working on inlined code from a flag_non_call_exceptions == false function. At least for calls in the source(s) that works, for other stmts I think we do not have an explicit nothrow on the stmt. > + if (oldlhs) > + { > + if (!may_throw) > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR, > build1 (IMAGPART_EXPR, itype, lhs)); > - gsi_insert_before (gsi, g, GSI_SAME_STMT); > - g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, > - gimple_assign_lhs (g)); > + if (may_throw) > + { > + gsi_insert_on_edge_immediate (e, g); > + *gsi = gsi_for_stmt (g); > + } > + else > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + g = gimple_build_assign (oldlhs, NOP_EXPR, gimple_assign_lhs (g)); > + if (may_throw) > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > } > - gsi_replace (gsi, g, true); > + if (!may_throw) > + gsi_replace (gsi, g, true); > g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR, > build1 (REALPART_EXPR, itype, lhs)); > - gsi_insert_after (gsi, g, GSI_NEW_STMT); > + if (may_throw && oldlhs == NULL_TREE) > + { > + gsi_insert_on_edge_immediate (e, g); > + *gsi = gsi_for_stmt (g); > + } > + else > + gsi_insert_after (gsi, g, GSI_NEW_STMT); rather than these dances with if (may_throw) isn't it easier to compute an insert gsi after finding the fallthru edge, splitting the edge if it is a critical one? (basically doing a less fancy gimple_find_edge_insert_loc) > if (!useless_type_conversion_p (TREE_TYPE (expected), itype)) > { > g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), > --- gcc/tree-ssa-ccp.c.jj 2017-01-01 12:45:37.000000000 +0100 > +++ gcc/tree-ssa-ccp.c 2017-03-02 15:59:04.392422396 +0100 > @@ -2890,9 +2890,21 @@ optimize_atomic_bit_test_and (gimple_stm > gimple_set_location (g, gimple_location (call)); > gimple_set_vuse (g, gimple_vuse (call)); > gimple_set_vdef (g, gimple_vdef (call)); > + gimple_call_set_nothrow (as_a <gcall *> (g), flag_non_call_exceptions == > 0); > SSA_NAME_DEF_STMT (gimple_vdef (call)) = g; > gimple_stmt_iterator gsi = *gsip; > gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + bool maybe_throw = false; > + edge e = NULL; > + if (flag_non_call_exceptions && stmt_ends_bb_p (call)) > + { > + maybe_clean_or_replace_eh_stmt (call, g); > + if (after || (use_bool && has_debug_uses)) > + { > + e = find_fallthru_edge (gsi_bb (gsi)->succs); > + maybe_throw = true; > + } > + } > if (after) > { > /* The internal function returns the value of the specified bit > @@ -2905,23 +2917,42 @@ optimize_atomic_bit_test_and (gimple_stm > use_bool ? build_int_cst (TREE_TYPE (lhs), 1) > : mask); > new_lhs = gimple_assign_lhs (g); > - gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + if (maybe_throw) > + { > + gsi_insert_on_edge_immediate (e, g); > + gsi = gsi_for_stmt (g); > + } > + else > + gsi_insert_after (&gsi, g, GSI_NEW_STMT); > } > if (use_bool && has_debug_uses) > { > - tree temp = make_node (DEBUG_EXPR_DECL); > - DECL_ARTIFICIAL (temp) = 1; > - TREE_TYPE (temp) = TREE_TYPE (lhs); > - SET_DECL_MODE (temp, TYPE_MODE (TREE_TYPE (lhs))); > - tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit); > - g = gimple_build_debug_bind (temp, t, g); > - gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + tree temp = NULL_TREE; > + if (!maybe_throw || after || single_pred_p (e->dest)) > + { > + temp = make_node (DEBUG_EXPR_DECL); > + DECL_ARTIFICIAL (temp) = 1; > + TREE_TYPE (temp) = TREE_TYPE (lhs); > + SET_DECL_MODE (temp, TYPE_MODE (TREE_TYPE (lhs))); > + tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit); > + g = gimple_build_debug_bind (temp, t, g); > + if (maybe_throw && !after) > + { > + gsi = gsi_after_labels (e->dest); > + gsi_insert_before (&gsi, g, GSI_SAME_STMT); > + } > + else > + gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + } > FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) > if (is_gimple_debug (g)) > { > use_operand_p use_p; > - FOR_EACH_IMM_USE_ON_STMT (use_p, iter) > - SET_USE (use_p, temp); > + if (temp == NULL_TREE) > + gimple_debug_bind_reset_value (g); > + else > + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) > + SET_USE (use_p, temp); > update_stmt (g); > } > } > --- gcc/testsuite/g++.dg/opt/pr79805.C.jj 2017-03-02 15:27:45.095232019 > +0100 > +++ gcc/testsuite/g++.dg/opt/pr79805.C 2017-03-02 15:27:19.000000000 > +0100 > @@ -0,0 +1,219 @@ > +// PR middle-end/79805 > +// { dg-do compile } > +// { dg-options "-O2 -fnon-call-exceptions" } > + > +struct A { A (); ~A (); }; > + > +void bar (void); > + > +int > +f0 (int *d, int f) > +{ > + A z; > + int e = __atomic_compare_exchange_n (d, &f, 1, 1, __ATOMIC_RELAXED, > __ATOMIC_RELAXED); > + return e; > +} > + > +int > +f1 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + return (__sync_fetch_and_or (a, mask) & mask) != 0; > +} > + > +int > +f2 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + unsigned int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED); > + unsigned int t2 = t1 & mask; > + return t2 != 0; > +} > + > +long int > +f3 (long int *a, int bit) > +{ > + A z; > + unsigned long int mask = (1ul << bit); > + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; > +} > + > +int > +f4 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 7); > + return (__sync_fetch_and_or (a, mask) & mask) != 0; > +} > + > +int > +f5 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 13); > + return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +int > +f6 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 0); > + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +void > +f7 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + if ((__sync_fetch_and_xor (a, mask) & mask) != 0) > + bar (); > +} > + > +void > +f8 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0) > + bar (); > +} > + > +int > +f9 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +int > +f10 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 7); > + return (__sync_fetch_and_xor (a, mask) & mask) != 0; > +} > + > +int > +f11 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 13); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +int > +f12 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 0); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +int > +f13 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; > +} > + > +int > +f14 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +int > +f15 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +int > +f16 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 7); > + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; > +} > + > +int > +f17 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 13); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; > +} > + > +int > +f18 (int *a) > +{ > + A z; > + unsigned int mask = (1u << 0); > + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +unsigned long int > +f19 (unsigned long int *a, int bit) > +{ > + A z; > + unsigned long int mask = (1ul << bit); > + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > + > +unsigned long int > +f20 (unsigned long int *a) > +{ > + A z; > + unsigned long int mask = (1ul << 7); > + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; > +} > + > +int > +f21 (int *a, int bit) > +{ > + A z; > + unsigned int mask = (1u << bit); > + return (__sync_fetch_and_or (a, mask) & mask); > +} > + > +unsigned long int > +f22 (unsigned long int *a) > +{ > + A z; > + unsigned long int mask = (1ul << 7); > + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask); > +} > + > +unsigned long int > +f23 (unsigned long int *a) > +{ > + A z; > + unsigned long int mask = (1ul << 7); > + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask); > +} > + > +unsigned short int > +f24 (unsigned short int *a) > +{ > + A z; > + unsigned short int mask = (1u << 7); > + return (__sync_fetch_and_or (a, mask) & mask) != 0; > +} > + > +unsigned short int > +f25 (unsigned short int *a) > +{ > + A z; > + unsigned short int mask = (1u << 7); > + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)