Here's the patch for __sync_mem_load, complete with tests.
I'll change and correct the actual implementation of the load pattern later ( I think I have the x86 fence wrong). It occurs to me that if I implement the __sync_mem_thread_fence (model) routine, then the appropriate fences can be created by using that __sync instead of sync_synchronize all the time as well.
Im trying to gather a set of optimal, or at least decent, sequences for each pattern. I'll go back and make changes when I have collected some and finished all the other atomic operations. It'll be easier then.
Andrew
* doc/extend.texi (__sync_mem_load): Document. * c-family/c-common.c (resolve_overloaded_builtin): Add BUILT_IN_SYNC_MEM_LOAD_N. * optabs.c (expand_sync_mem_load): New. * optabs.h (enum direct_optab_index): Add DOI_sync_mem_load. (sync_mem_load_optab): Define. * genopinit.c: Add entry for sync_mem_load. * builtins.c (expand_builtin_sync_mem_load): New. (expand_builtin): Handle BUILT_IN_SYNC_MEM_LOAD_* * sync-bultins.def: Add entries for BUILT_IN_SYNC_MEM_LOAD_*. * testsuite/gcc.dg/sync-mem-invalid.c: Add invalid load tests. * testsuite/gcc.dg/sync-mem.h: Add load executable tests. * builtin-types.def (BT_FN_I{1,2,4,8,16}_VPTR_INT): New. * expr.h (expand_sync_mem_load): Declare. * fortran/types.def (BT_FN_I{1,2,4,8,16}_VPTR_INT): New. * config/i386/sync.md (sync_mem_load<mode>): New pattern. Index: doc/extend.texi =================================================================== *** doc/extend.texi (revision 175331) --- doc/extend.texi (working copy) *************** This means that all previous memory stor *** 6729,6734 **** --- 6729,6747 ---- previous memory loads have been satisfied, but following memory reads are not prevented from being speculated to before the barrier. + @item @var{type} __sync_mem_load (@var{type} *ptr, int memmodel, ...) + @findex __sync_mem_load + This builtin implements an atomic load operation within the constraints of a + memory model. It returns the contents of @code{*@var{ptr}}. + + The valid memory model variants for this builtin are + __SYNC_MEM_RELAXED, __SYNC_MEM_SEQ_CST, __SYNC_MEM_ACQUIRE, and + __SYNC_MEM_CONSUME. The target pattern is responsible + for issuing the different synchronization instructions. It should default to + the more restrictive memory model, the sequentially consistent model. If + nothing is implemented for the target, the compiler will implement it by + issuing a memory barrier, the load, and then another memory barrier. + @item @var{type} __sync_mem_exchange (@var{type} *ptr, @var{type} value, int memmodel, ...) @findex __sync_mem_exchange This builtin implements an atomic exchange operation within the Index: c-family/c-common.c =================================================================== *** c-family/c-common.c (revision 175331) --- c-family/c-common.c (working copy) *************** resolve_overloaded_builtin (location_t l *** 9061,9066 **** --- 9061,9067 ---- case BUILT_IN_SYNC_LOCK_TEST_AND_SET_N: case BUILT_IN_SYNC_LOCK_RELEASE_N: case BUILT_IN_SYNC_MEM_EXCHANGE_N: + case BUILT_IN_SYNC_MEM_LOAD_N: { int n = sync_resolve_size (function, params); tree new_function, first_param, result; Index: optabs.c =================================================================== *** optabs.c (revision 175331) --- optabs.c (working copy) *************** expand_sync_lock_test_and_set (rtx mem, *** 7057,7062 **** --- 7057,7104 ---- return NULL_RTX; } + /* This function expands the atomic load operation: + return the atomically loaded value in MEM. + + MEMMODEL is the memory model variant to use. + TARGET is an option place to stick the return value. */ + + rtx + expand_sync_mem_load (enum memmodel model, rtx mem, rtx target) + { + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode; + + /* If the target supports the load directly, great. */ + icode = direct_optab_handler (sync_mem_load_optab, mode); + if (icode != CODE_FOR_nothing) + { + struct expand_operand ops[3]; + + create_output_operand (&ops[0], target, mode); + create_fixed_operand (&ops[1], mem); + create_integer_operand (&ops[2], model); + if (maybe_expand_insn (icode, 3, ops)) + return ops[0].value; + } + + /* For any model other than RELAXED, perform a synchronization first. */ + if (model != MEMMODEL_RELAXED) + expand_builtin_sync_synchronize (); + + /* If the result is unused, don't bother loading. */ + if (target != const0_rtx) + emit_move_insn (target, mem); + else + target = NULL_RTX; + + /* For SEQ_CST, also emit a barrier after the load. */ + if (model == MEMMODEL_SEQ_CST) + expand_builtin_sync_synchronize (); + + return target; + } + /* This function expands the atomic exchange operation: atomically store VAL in MEM and return the previous value in MEM. Index: optabs.h =================================================================== *** optabs.h (revision 175331) --- optabs.h (working copy) *************** enum direct_optab_index *** 679,684 **** --- 679,685 ---- /* Atomic operations with C++0x memory model parameters. */ DOI_sync_mem_exchange, + DOI_sync_mem_load, DOI_MAX }; *************** typedef struct direct_optab_d *direct_op *** 730,735 **** --- 731,738 ---- #define sync_mem_exchange_optab \ (&direct_optab_table[(int) DOI_sync_mem_exchange]) + #define sync_mem_load_optab \ + (&direct_optab_table[(int) DOI_sync_mem_load]) /* Target-dependent globals. */ struct target_optabs { Index: genopinit.c =================================================================== *** genopinit.c (revision 175331) --- genopinit.c (working copy) *************** static const char * const optabs[] = *** 242,247 **** --- 242,248 ---- "set_direct_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))", "set_direct_optab_handler (sync_lock_release_optab, $A, CODE_FOR_$(sync_lock_release$I$a$))", "set_direct_optab_handler (sync_mem_exchange_optab, $A, CODE_FOR_$(sync_mem_exchange$I$a$))", + "set_direct_optab_handler (sync_mem_load_optab, $A, CODE_FOR_$(sync_mem_load$I$a$))", "set_optab_handler (vec_set_optab, $A, CODE_FOR_$(vec_set$a$))", "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))", "set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))", Index: builtins.c =================================================================== *** builtins.c (revision 175331) --- builtins.c (working copy) *************** get_memmodel (tree exp) *** 5221,5226 **** --- 5221,5255 ---- return (enum memmodel) INTVAL (op); } + /* Expand the __sync_mem_load intrinsic: + + TYPE __sync_mem_load (TYPE *to, TYPE from, enum memmodel) + + EXP is the CALL_EXPR. + TARGET is an optional place for us to store the results. */ + + static rtx + expand_builtin_sync_mem_load (enum machine_mode mode, tree exp, rtx target) + { + rtx mem; + enum memmodel model; + + model = get_memmodel (CALL_EXPR_ARG (exp, 1)); + if (model != MEMMODEL_RELAXED + && model != MEMMODEL_SEQ_CST + && model != MEMMODEL_CONSUME + && model != MEMMODEL_ACQUIRE) + { + error ("invalid memory model for %<__sync_mem_load%>"); + return NULL_RTX; + } + + /* Expand the operand. */ + mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); + + return expand_sync_mem_load (model, mem, target); + } + /* Expand the __sync_mem_exchange intrinsic: TYPE __sync_mem_exchange (TYPE *to, TYPE from, enum memmodel) *************** expand_builtin (tree exp, rtx target, rt *** 6077,6082 **** --- 6106,6123 ---- return target; break; + case BUILT_IN_SYNC_MEM_LOAD_1: + case BUILT_IN_SYNC_MEM_LOAD_2: + case BUILT_IN_SYNC_MEM_LOAD_4: + case BUILT_IN_SYNC_MEM_LOAD_8: + case BUILT_IN_SYNC_MEM_LOAD_16: + mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_MEM_LOAD_1); + target = expand_builtin_sync_mem_load (mode, exp, target); + if (target) + return target; + break; + + case BUILT_IN_SYNC_MEM_EXCHANGE_1: case BUILT_IN_SYNC_MEM_EXCHANGE_2: case BUILT_IN_SYNC_MEM_EXCHANGE_4: Index: sync-builtins.def =================================================================== *** sync-builtins.def (revision 175331) --- sync-builtins.def (working copy) *************** DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRON *** 259,264 **** --- 259,283 ---- /* __sync* builtins for the C++ memory model. */ + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_LOAD_N, + "__sync_mem_load", + BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_LOAD_1, + "__sync_mem_load_1", + BT_FN_I1_VPTR_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_LOAD_2, + "__sync_mem_load_2", + BT_FN_I2_VPTR_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_LOAD_4, + "__sync_mem_load_4", + BT_FN_I4_VPTR_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_LOAD_8, + "__sync_mem_load_8", + BT_FN_I8_VPTR_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_LOAD_16, + "__sync_mem_load_16", + BT_FN_I16_VPTR_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_EXCHANGE_N, "__sync_mem_exchange", BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST) Index: testsuite/gcc.dg/sync-mem-invalid.c =================================================================== *** testsuite/gcc.dg/sync-mem-invalid.c (revision 175331) --- testsuite/gcc.dg/sync-mem-invalid.c (working copy) *************** int i; *** 8,11 **** --- 8,13 ---- main () { __sync_mem_exchange (&i, 1, __SYNC_MEM_CONSUME); /* { dg-error "invalid memory model" } */ + __sync_mem_load (&i, __SYNC_MEM_RELEASE); /* { dg-error "invalid memory model" } */ + __sync_mem_load (&i, __SYNC_MEM_ACQ_REL); /* { dg-error "invalid memory model" } */ } Index: testsuite/gcc.dg/sync-mem.h =================================================================== *** testsuite/gcc.dg/sync-mem.h (revision 175331) --- testsuite/gcc.dg/sync-mem.h (working copy) *************** void test_exchange() *** 18,27 **** EXCHANGE (v, __SYNC_MEM_SEQ_CST); } main () { ! test_exchange(); return 0; } --- 18,39 ---- EXCHANGE (v, __SYNC_MEM_SEQ_CST); } + #define LOAD(VAR, MODE) if (__sync_mem_load (&VAR, MODE) != count++) abort(); else VAR++ + + void test_load() + { + v = 0; + count = 0; + LOAD (v, __SYNC_MEM_RELAXED); + LOAD (v, __SYNC_MEM_ACQUIRE); + LOAD (v, __SYNC_MEM_CONSUME); + LOAD (v, __SYNC_MEM_SEQ_CST); + } main () { ! test_exchange (); ! test_load (); return 0; } Index: builtin-types.def =================================================================== *** builtin-types.def (revision 175331) --- builtin-types.def (working copy) *************** DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_ *** 315,320 **** --- 315,325 ---- BT_BOOL, BT_PTR_LONG, BT_PTR_LONG) DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG) + DEF_FUNCTION_TYPE_2 (BT_FN_I1_VPTR_INT, BT_I1, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I2_VPTR_INT, BT_I2, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_INT, BT_I4, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_INT, BT_I8, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_INT, BT_I16, BT_VOLATILE_PTR, BT_INT) DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR) Index: expr.h =================================================================== *** expr.h (revision 175331) --- expr.h (working copy) *************** rtx expand_bool_compare_and_swap (rtx, r *** 217,222 **** --- 217,223 ---- rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); rtx expand_sync_lock_test_and_set (rtx, rtx, rtx); + rtx expand_sync_mem_load (enum memmodel, rtx, rtx); rtx expand_sync_mem_exchange (enum memmodel, rtx, rtx, rtx); /* Functions from expmed.c: */ Index: fortran/types.def =================================================================== *** fortran/types.def (revision 175331) --- fortran/types.def (working copy) *************** DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_I4, B *** 98,103 **** --- 98,108 ---- DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_I8, BT_I8, BT_VOLATILE_PTR, BT_I8) DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_I16, BT_I16, BT_VOLATILE_PTR, BT_I16) DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTR, BT_VOID, BT_PTR, BT_PTR) + DEF_FUNCTION_TYPE_2 (BT_FN_I1_VPTR_INT, BT_I1, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I2_VPTR_INT, BT_I2, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_INT, BT_I4, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_INT, BT_I8, BT_VOLATILE_PTR, BT_INT) + DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_INT, BT_I16, BT_VOLATILE_PTR, BT_INT) DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR) Index: config/i386/sync.md =================================================================== *** config/i386/sync.md (revision 175331) --- config/i386/sync.md (working copy) *************** *** 232,237 **** --- 232,252 ---- return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}"; }) + + (define_expand "sync_mem_load<mode>" + [(match_operand:SWI 0 "register_operand" "") ;; output + (match_operand:SWI 1 "memory_operand" "") ;; memory + (match_operand:SI 2 "const_int_operand" "")] ;; memory model + "" + { + if (INTVAL (operands[2]) == MEMMODEL_ACQUIRE || + INTVAL (operands[2]) == MEMMODEL_SEQ_CST) + expand_builtin_sync_synchronize (); + ix86_expand_move (<MODE>mode, operands); + DONE; + }) + + (define_expand "sync_mem_exchange<mode>" [(match_operand:SWI 0 "register_operand" "") ;; output (match_operand:SWI 1 "memory_operand" "") ;; memory