Hi Tom! For my understanding:
On 2022-02-10T10:13:10+0100, Tom de Vries via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > The ptx isa specifies (for pre-sm_7x) that atomic operations on shared memory > locations do not guarantee atomicity with respect to normal store instructions > to the same address. > > This can be fixed by: > - inserting barriers between normal stores and atomic operations to a common > address > - using atom.exch to store to locations accessed by other atomic operations. > > It's not clearly spelled out which barriers are needed, and a barrier seem > more > expensive than atomic exchange. > > Implement the pre-sm_7x shared atomic store using atomic exchange. > > That includes stores using generic addressing, since those may also point to > shared memory. It is expected that this changes, for example (similar elsewhere) 'nvptx-none/libatomic/store_4_.o', to use (a) 'atom.exch' (with a new dummy register allocated) and yet (b) 'membar.sys' remains before as well as after (a): .visible .func __atomic_store_4 (.param .u64 %in_ar0, .param .u32 %in_ar1, .param .u32 %in_ar2) { .reg .u64 %ar0; ld.param.u64 %ar0,[%in_ar0]; .reg .u32 %ar1; ld.param.u32 %ar1,[%in_ar1]; .reg .u32 %ar2; ld.param.u32 %ar2,[%in_ar2]; .reg .u64 %r22; .reg .u32 %r23; +.reg .u32 %r25; mov.u64 %r22,%ar0; mov.u32 %r23,%ar1; .loc 2 39 5 membar.sys; -st.u32 [%r22],%r23; +atom.exch.b32 %r25,[%r22],%r23; membar.sys; .loc 2 40 1 ret; } And, for example (similar elsewhere) 'nvptx-none/libgomp/single.o', a 'ld' that previously was issued after 'membar.sys' now moves before that one: .visible .func (.param .u64 %value_out) GOMP_single_copy_start { [...] .reg .u64 %r33; .reg .u64 %r34; [...] .reg .pred %r51; .reg .u64 %r50; .reg .u64 %r52; [...] ld.shared.u64 %r50,[nvptx_thrs]; add.u64 %r33,%r50,%r49; .loc 2 1317 32 ld.u64 %r34,[%r33+32]; .loc 2 1317 6 setp.eq.u64 %r51,%r34,0; @ %r51 bra $L6; .loc 4 66 3 -membar.sys; ld.u64 %r52,[%r33+24]; -st.u64 [%r34+80],%r52; +membar.sys; +atom.exch.b64 %r53,[%r34+80],%r52; $L6: [...] (But I see there is another 'ld.u64 %r34,[%r33+32]' that previously also already was issued before the 'membar.sys'.) Grüße Thomas > [nvptx] Handle pre-sm_7x shared atomic store using atomic exchange > > gcc/ChangeLog: > > 2022-02-02 Tom de Vries <tdevr...@suse.de> > > * config/nvptx/nvptx-protos.h (nvptx_mem_maybe_shared_p): Declare. > * config/nvptx/nvptx.cc (nvptx_mem_data_area): New static function. > (nvptx_mem_maybe_shared_p): New function. > * config/nvptx/nvptx.md (define_expand "atomic_store<mode>"): New > define_expand. > > gcc/testsuite/ChangeLog: > > 2022-02-02 Tom de Vries <tdevr...@suse.de> > > * gcc.target/nvptx/atomic-store-1.c: New test. > * gcc.target/nvptx/atomic-store-3.c: New test. > * gcc.target/nvptx/stack-atomics-run.c: Update. > > --- > gcc/config/nvptx/nvptx-protos.h | 1 + > gcc/config/nvptx/nvptx.cc | 22 ++++++++++++++++ > gcc/config/nvptx/nvptx.md | 30 > ++++++++++++++++++++++ > gcc/testsuite/gcc.target/nvptx/atomic-store-1.c | 26 +++++++++++++++++++ > gcc/testsuite/gcc.target/nvptx/atomic-store-3.c | 25 ++++++++++++++++++ > gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c | 6 ++++- > 6 files changed, 109 insertions(+), 1 deletion(-) > > diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h > index a846e341917..0bf9af406a2 100644 > --- a/gcc/config/nvptx/nvptx-protos.h > +++ b/gcc/config/nvptx/nvptx-protos.h > @@ -60,5 +60,6 @@ extern const char *nvptx_output_simt_exit (rtx); > extern const char *nvptx_output_red_partition (rtx, rtx); > extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); > extern bool nvptx_mem_local_p (rtx); > +extern bool nvptx_mem_maybe_shared_p (const_rtx); > #endif > #endif > diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc > index 1b0227a2c31..5b26c0f4c7d 100644 > --- a/gcc/config/nvptx/nvptx.cc > +++ b/gcc/config/nvptx/nvptx.cc > @@ -76,6 +76,7 @@ > #include "intl.h" > #include "opts.h" > #include "tree-pretty-print.h" > +#include "rtl-iter.h" > > /* This file should be included last. */ > #include "target-def.h" > @@ -2787,6 +2788,27 @@ nvptx_print_operand_address (FILE *file, machine_mode > mode, rtx addr) > nvptx_print_address_operand (file, addr, mode); > } > > +static nvptx_data_area > +nvptx_mem_data_area (const_rtx x) > +{ > + gcc_assert (GET_CODE (x) == MEM); > + > + const_rtx addr = XEXP (x, 0); > + subrtx_iterator::array_type array; > + FOR_EACH_SUBRTX (iter, array, addr, ALL) > + if (SYMBOL_REF_P (*iter)) > + return SYMBOL_DATA_AREA (*iter); > + > + return DATA_AREA_GENERIC; > +} > + > +bool > +nvptx_mem_maybe_shared_p (const_rtx x) > +{ > + nvptx_data_area area = nvptx_mem_data_area (x); > + return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC; > +} > + > /* Print an operand, X, to FILE, with an optional modifier in CODE. > > Meaning of CODE: > diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md > index cced68e0d4a..1a283b41922 100644 > --- a/gcc/config/nvptx/nvptx.md > +++ b/gcc/config/nvptx/nvptx.md > @@ -2051,6 +2051,36 @@ (define_insn "atomic_exchange<mode>" > } > [(set_attr "atomic" "true")]) > > +(define_expand "atomic_store<mode>" > + [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory > + (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input > + (match_operand:SI 2 "const_int_operand")] ;; model > + "" > +{ > + struct address_info info; > + decompose_mem_address (&info, operands[0]); > + if (info.base != NULL && REG_P (*info.base) > + && REGNO_PTR_FRAME_P (REGNO (*info.base))) > + { > + emit_insn (gen_mov<mode> (operands[0], operands[1])); > + DONE; > + } > + > + if (TARGET_SM70) > + /* Fall back to expand_atomic_store. */ > + FAIL; > + > + bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]); > + if (!maybe_shared_p) > + /* Fall back to expand_atomic_store. */ > + FAIL; > + > + rtx tmpreg = gen_reg_rtx (<MODE>mode); > + emit_insn (gen_atomic_exchange<mode> (tmpreg, operands[0], operands[1], > + operands[2])); > + DONE; > +}) > + > (define_insn "atomic_fetch_add<mode>" > [(set (match_operand:SDIM 1 "memory_operand" "+m") > (unspec_volatile:SDIM > diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c > b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c > new file mode 100644 > index 00000000000..cee3815eda5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c > @@ -0,0 +1,26 @@ > +/* Test the atomic store expansion for sm <= sm_6x targets, > + shared state space. */ > + > +/* { dg-do compile } */ > +/* { dg-options "-misa=sm_53" } */ > + > +enum memmodel > +{ > + MEMMODEL_SEQ_CST = 5 > +}; > + > +unsigned int u32 __attribute__((shared)); > +unsigned long long int u64 __attribute__((shared)); > + > +int > +main() > +{ > + __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST); > + __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */ > +/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */ > +/* { dg-final { scan-assembler-times "membar.cta" 4 } } */ > diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c > b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c > new file mode 100644 > index 00000000000..cc0264f2b06 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c > @@ -0,0 +1,25 @@ > +/* Test the atomic store expansion, global state space. */ > + > +/* { dg-do compile } */ > +/* { dg-additional-options "-Wno-long-long" } */ > + > +enum memmodel > +{ > + MEMMODEL_SEQ_CST = 5 > +}; > + > +unsigned int u32; > +unsigned long long int u64; > + > +int > +main() > +{ > + __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST); > + __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "st.global.u32" 1 } } */ > +/* { dg-final { scan-assembler-times "st.global.u64" 1 } } */ > +/* { dg-final { scan-assembler-times "membar.sys" 4 } } */ > diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c > b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c > index ad8e2f842fb..cd045964dfe 100644 > --- a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c > +++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c > @@ -39,6 +39,10 @@ main (void) > if (b != 1) > __builtin_abort (); > > - > + a = 1; > + __atomic_store_n (&a, 0, MEMMODEL_RELAXED); > + if (a != 0) > + __builtin_abort (); > + > return 0; > } ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955