This patch adds missing pattern support for atomic_test_and_set and atomic_clear operations. It also restructures the code for atomic_test_and_set, atomic_exchange, and __sync_lock_test_and_set so that it is easier to read and tries things in a rational manner.
bootstrapped on x86_64-unknown-linux-gnu with no new regressions. Andrew
* optab.c (maybe_emit_atomic_exchange): New. Try to emit an atomic_exchange pattern. (maybe_emit_sync_lock_test_and_set): New. Try to emit an exchange using __sync_lock_test_and_set. (maybe_emit_compare_and_swap_exchange_loop): New. Try to emit an exchange using a compare_and_swap loop. (expand_sync_lock_test_and_set): New. Expand sync_lock_test_and_set. (expand_atomic_test_and_set): New. Expand test_and_set operation. (expand_atomic_exchange): Use new maybe_emit_* functions. (expand_atomic_store): Use new maybe_emit_* functions. * builtins.c (expand_builtin_sync_lock_test_and_set): Call expand_sync_lock_test_and_set routine. (expand_builtin_atomic_exchange): Remove parameter from call. (expand_builtin_atomic_clear): Use atomic_clear pattern if present. (expand_builtin_atomic_test_and_set): Add target and simply call expand_atomic_test_and_set. (expand_builtin): Add target to expand_builtin_atomic_test_and_set. * expr.h (expand_atomic_exchange): Add parameter. (expand_sync_lock_test_and_set): New prototype. (expand_atomic_test_and_set, expand_atomic_clear): New prototypes. Index: optabs.c =================================================================== *** optabs.c (revision 181614) --- optabs.c (working copy) *************** expand_compare_and_swap_loop (rtx mem, r *** 7325,7341 **** } ! /* This function expands the atomic exchange operation: ! atomically store VAL in MEM and return the previous value in MEM. ! ! MEMMODEL is the memory model variant to use. ! TARGET is an optional place to stick the return value. ! USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used ! as a fall back if the atomic_exchange pattern does not exist. */ ! ! rtx ! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model, ! bool use_test_and_set) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; --- 7325,7336 ---- } ! /* This function tries to emit an atomic_exchange intruction. VAL is written ! to *MEM using memory model MODEL. The previous contents of *MEM are returned, ! using TARGET if possible. */ ! ! static rtx ! maybe_emit_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; *************** expand_atomic_exchange (rtx target, rtx *** 7355,7419 **** return ops[0].value; } ! /* Legacy sync_lock_test_and_set works the same, but is only defined as an ! acquire barrier. If the pattern exists, and the memory model is stronger ! than acquire, add a release barrier before the instruction. ! The barrier is not needed if sync_lock_test_and_set doesn't exist since ! it will expand into a compare-and-swap loop. ! ! Some targets have non-compliant test_and_sets, so it would be incorrect ! to emit a test_and_set in place of an __atomic_exchange. The test_and_set ! builtin shares this expander since exchange can always replace the ! test_and_set. */ ! if (use_test_and_set) ! { ! icode = optab_handler (sync_lock_test_and_set_optab, mode); ! if (icode != CODE_FOR_nothing) ! { ! struct expand_operand ops[3]; ! rtx last_insn = get_last_insn (); ! if (model == MEMMODEL_SEQ_CST ! || model == MEMMODEL_RELEASE ! || model == MEMMODEL_ACQ_REL) ! expand_mem_thread_fence (model); ! ! create_output_operand (&ops[0], target, mode); ! create_fixed_operand (&ops[1], mem); ! /* VAL may have been promoted to a wider mode. Shrink it if so. */ ! create_convert_operand_to (&ops[2], val, mode, true); ! if (maybe_expand_insn (icode, 3, ops)) ! return ops[0].value; ! ! delete_insns_since (last_insn); ! } ! ! /* If an external test-and-set libcall is provided, use that instead of ! any external compare-and-swap that we might get from the compare-and- ! swap-loop expansion below. */ ! if (!can_compare_and_swap_p (mode, false)) ! { ! rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, mode); ! if (libfunc != NULL) ! { ! rtx addr; ! if (model == MEMMODEL_SEQ_CST ! || model == MEMMODEL_RELEASE ! || model == MEMMODEL_ACQ_REL) ! expand_mem_thread_fence (model); ! addr = convert_memory_address (ptr_mode, XEXP (mem, 0)); ! return emit_library_call_value (libfunc, target, LCT_NORMAL, ! mode, 2, addr, ptr_mode, ! val, mode); ! } } } ! /* Otherwise, use a compare-and-swap loop for the exchange. */ if (can_compare_and_swap_p (mode, true)) { if (!target || !register_operand (target, mode)) --- 7350,7427 ---- return ops[0].value; } ! return NULL_RTX; ! } ! /* This function tries to implement an atomic exchange operation using ! __sync_lock_test_and_set. VAL is written to *MEM using memory model MODEL. ! The previous contents of *MEM are returned, using TARGET if possible. ! Since this instructionn is an acquire barrier only, stronger memory ! models may require additional barriers to be emitted. */ ! ! static rtx ! maybe_emit_sync_lock_test_and_set (rtx target, rtx mem, rtx val, ! enum memmodel model) ! { ! enum machine_mode mode = GET_MODE (mem); ! enum insn_code icode; ! rtx last_insn = get_last_insn (); ! icode = optab_handler (sync_lock_test_and_set_optab, mode); ! /* Legacy sync_lock_test_and_set is an acquire barrier. If the pattern ! exists, and the memory model is stronger than acquire, add a release ! barrier before the instruction. */ ! if (model == MEMMODEL_SEQ_CST ! || model == MEMMODEL_RELEASE ! || model == MEMMODEL_ACQ_REL) ! expand_mem_thread_fence (model); ! if (icode != CODE_FOR_nothing) ! { ! struct expand_operand ops[3]; ! create_output_operand (&ops[0], target, mode); ! create_fixed_operand (&ops[1], mem); ! /* VAL may have been promoted to a wider mode. Shrink it if so. */ ! create_convert_operand_to (&ops[2], val, mode, true); ! if (maybe_expand_insn (icode, 3, ops)) ! return ops[0].value; ! } ! ! /* If an external test-and-set libcall is provided, use that instead of ! any external compare-and-swap that we might get from the compare-and- ! swap-loop expansion later. */ ! if (!can_compare_and_swap_p (mode, false)) ! { ! rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, mode); ! if (libfunc != NULL) ! { ! rtx addr; ! ! addr = convert_memory_address (ptr_mode, XEXP (mem, 0)); ! return emit_library_call_value (libfunc, target, LCT_NORMAL, ! mode, 2, addr, ptr_mode, ! val, mode); } } ! /* If the test_and_set can't be emitted, eliminate any barrier that might ! have been emitted. */ ! delete_insns_since (last_insn); ! return NULL_RTX; ! } ! ! /* This function tries to implement an atomic exchange operation using a ! compare_and_swap loop. VAL is written to *MEM. The previous contents of ! *MEM are returned, using TARGET if possible. No memory model is required ! since a compare_and_swap loop is seq-cst. */ ! ! static rtx ! maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val) ! { ! enum machine_mode mode = GET_MODE (mem); ! if (can_compare_and_swap_p (mode, true)) { if (!target || !register_operand (target, mode)) *************** expand_atomic_exchange (rtx target, rtx *** 7427,7432 **** --- 7435,7539 ---- return NULL_RTX; } + #ifndef HAVE_atomic_test_and_set + #define HAVE_atomic_test_and_set 0 + #define gen_atomic_test_and_set(x,y,z) (gcc_unreachable (), NULL_RTX) + #endif + + /* This function expands the legacy _sync_lock test_and_set operation which is + generally an atomic exchange. Some limited targets only allow the + constant 1 to be stored. This is an ACQUIRE operation. + + TARGET is an optional place to stick the return value. + MEM is where VAL is stored. */ + + rtx + expand_sync_lock_test_and_set (rtx target, rtx mem, rtx val) + { + rtx ret; + + /* Try an atomic_exchange first. */ + ret = maybe_emit_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE); + + if (!ret) + ret = maybe_emit_sync_lock_test_and_set (target, mem, val, + MEMMODEL_ACQUIRE); + if (!ret) + ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val); + + /* If there are no other options, try atomic_test_and_set if the value + being stored is 1. */ + if (!ret && val == const1_rtx && HAVE_atomic_test_and_set) + { + ret = gen_atomic_test_and_set (target, mem, GEN_INT (MEMMODEL_ACQUIRE)); + emit_insn (ret); + } + + return ret; + } + + /* This function expands the atomic test_and_set operation: + atomically store a boolean TRUE into MEM and return the previous value. + + MEMMODEL is the memory model variant to use. + TARGET is an optional place to stick the return value. */ + + rtx + expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) + { + enum machine_mode mode = GET_MODE (mem); + rtx ret = NULL_RTX; + + if (target == NULL_RTX) + target = gen_reg_rtx (mode); + + if (HAVE_atomic_test_and_set) + { + ret = gen_atomic_test_and_set (target, mem, GEN_INT (MEMMODEL_ACQUIRE)); + emit_insn (ret); + return ret; + } + + /* If there is no test and set, try exchange, then a compare_and_swap loop, + then __sync_test_and_set. */ + ret = maybe_emit_atomic_exchange (target, mem, const1_rtx, model); + + if (!ret) + ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, const1_rtx); + + if (!ret) + ret = maybe_emit_sync_lock_test_and_set (target, mem, const1_rtx, model); + + if (ret) + return ret; + + /* Failing all else, assume a single threaded environment and simply perform + the operation. */ + emit_move_insn (target, mem); + emit_move_insn (mem, const1_rtx); + return target; + } + + /* This function expands the atomic exchange operation: + atomically store VAL in MEM and return the previous value in MEM. + + MEMMODEL is the memory model variant to use. + TARGET is an optional place to stick the return value. */ + + rtx + expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) + { + rtx ret; + + ret = maybe_emit_atomic_exchange (target, mem, val, model); + + /* Next try a compare-and-swap loop for the exchange. */ + if (!ret) + ret = maybe_emit_compare_and_swap_exchange_loop (target, mem, val); + + return ret; + } + /* This function expands the atomic compare exchange operation: *PTARGET_BOOL is an optional place to store the boolean success/failure. *************** expand_atomic_store (rtx mem, rtx val, e *** 7726,7732 **** the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { ! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false); if (target) return const0_rtx; else --- 7833,7841 ---- the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { ! rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model); ! if (!target) ! target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val); if (target) return const0_rtx; else Index: builtins.c =================================================================== *** builtins.c (revision 181614) --- builtins.c (working copy) *************** expand_builtin_sync_lock_test_and_set (e *** 5227,5233 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE, true); } /* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */ --- 5227,5233 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_sync_lock_test_and_set (target, mem, val); } /* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */ *************** expand_builtin_atomic_exchange (enum mac *** 5291,5297 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, model, false); } /* Expand the __atomic_compare_exchange intrinsic: --- 5291,5297 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, model); } /* Expand the __atomic_compare_exchange intrinsic: *************** expand_builtin_atomic_fetch_op (enum mac *** 5482,5487 **** --- 5482,5492 ---- } + #ifndef HAVE_atomic_clear + # define HAVE_atomic_clear 0 + # define gen_atomic_clear(x,y) (gcc_unreachable (), NULL_RTX) + #endif + /* Expand an atomic clear operation. void _atomic_clear (BOOL *obj, enum memmodel) EXP is the call expression. */ *************** expand_builtin_atomic_clear (tree exp) *** 5503,5508 **** --- 5508,5519 ---- return const0_rtx; } + if (HAVE_atomic_clear) + { + emit_insn (gen_atomic_clear (mem, model)); + return const0_rtx; + } + /* Try issuing an __atomic_store, and allow fallback to __sync_lock_release. Failing that, a store is issued by __atomic_store. The only way this can fail is if the bool type is larger than a word size. Unlikely, but *************** expand_builtin_atomic_clear (tree exp) *** 5519,5527 **** EXP is the call expression. */ static rtx ! expand_builtin_atomic_test_and_set (tree exp) { ! rtx mem, ret; enum memmodel model; enum machine_mode mode; --- 5530,5538 ---- EXP is the call expression. */ static rtx ! expand_builtin_atomic_test_and_set (tree exp, rtx target) { ! rtx mem; enum memmodel model; enum machine_mode mode; *************** expand_builtin_atomic_test_and_set (tree *** 5529,5548 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); ! /* Try issuing an exchange. If it is lock free, or if there is a limited ! functionality __sync_lock_test_and_set, this will utilize it. */ ! ret = expand_atomic_exchange (NULL_RTX, mem, const1_rtx, model, true); ! if (ret) ! return ret; ! ! /* Otherwise, there is no lock free support for test and set. Simply ! perform a load and a store. Since this presumes a non-atomic architecture, ! also assume single threadedness and don't issue barriers either. */ ! ! ret = gen_reg_rtx (mode); ! emit_move_insn (ret, mem); ! emit_move_insn (mem, const1_rtx); ! return ret; } --- 5540,5546 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); ! return expand_atomic_test_and_set (target, mem, model); } *************** expand_builtin (tree exp, rtx target, rt *** 6711,6717 **** break; case BUILT_IN_ATOMIC_TEST_AND_SET: ! return expand_builtin_atomic_test_and_set (exp); case BUILT_IN_ATOMIC_CLEAR: return expand_builtin_atomic_clear (exp); --- 6709,6715 ---- break; case BUILT_IN_ATOMIC_TEST_AND_SET: ! return expand_builtin_atomic_test_and_set (exp, target); case BUILT_IN_ATOMIC_CLEAR: return expand_builtin_atomic_clear (exp); Index: expr.h =================================================================== *** expr.h (revision 181614) --- expr.h (working copy) *************** rtx emit_conditional_add (rtx, enum rtx_ *** 214,225 **** rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); ! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel, bool); rtx expand_atomic_load (rtx, rtx, enum memmodel); rtx expand_atomic_store (rtx, rtx, enum memmodel, bool); rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, bool); void expand_atomic_thread_fence (enum memmodel); void expand_atomic_signal_fence (enum memmodel); --- 214,228 ---- rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); + rtx expand_sync_lock_test_and_set (rtx, rtx, rtx); ! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel); rtx expand_atomic_load (rtx, rtx, enum memmodel); rtx expand_atomic_store (rtx, rtx, enum memmodel, bool); rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, bool); + rtx expand_atomic_test_and_set (rtx, rtx, enum memmodel); + rtx expand_atomic_clear (rtx, enum memmodel); void expand_atomic_thread_fence (enum memmodel); void expand_atomic_signal_fence (enum memmodel);