Hi, In PR target/104364, two problems were reported: - in muniform-simt mode, an atom.cas insn is no longer executed in the "master lane" only. - in msoft-stack mode, an __atomic_compare_exchange_n on stack memory is translated assuming it accesses local memory, while that's not the case.
Fix these by: - ensuring that all insns with atomic attribute are also predicable, such that the validate_change in nvptx_reorg_uniform_simt will succeed, and asserting that it does, and - guarding the local atomics implementation with a new function nvptx_mem_local_p that correctly handles msoft-stack. Tested on x86_64 with nvptx accelerator. Committed to trunk. Thanks, - Tom [nvptx] Fix .local atomic regressions gcc/ChangeLog: 2022-02-04 Tom de Vries <tdevr...@suse.de> PR target/104364 * config/nvptx/nvptx-protos.h (nvptx_mem_local_p): Declare. * config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Assert that change is validated. (nvptx_mem_local_p): New function. * config/nvptx/nvptx.md: Use nvptx_mem_local_p. (define_c_enum "unspecv"): Add UNSPECV_CAS_LOCAL. (define_insn "atomic_compare_and_swap<mode>_1_local"): New non-atomic, non-predicable define_insn, factored out of ... (define_insn "atomic_compare_and_swap<mode>_1"): ... here. Make predicable again. (define_expand "atomic_compare_and_swap<mode>"): Use atomic_compare_and_swap<mode>_1_local. gcc/testsuite/ChangeLog: 2022-02-04 Tom de Vries <tdevr...@suse.de> PR target/104364 * gcc.target/nvptx/softstack-2.c: New test. * gcc.target/nvptx/uniform-simt-1.c: New test. --- gcc/config/nvptx/nvptx-protos.h | 1 + gcc/config/nvptx/nvptx.cc | 25 +++++++++- gcc/config/nvptx/nvptx.md | 63 +++++++++++++------------ gcc/testsuite/gcc.target/nvptx/softstack-2.c | 11 +++++ gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c | 18 +++++++ 5 files changed, 87 insertions(+), 31 deletions(-) diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index 3d6ad148cb4..a846e341917 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -59,5 +59,6 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); +extern bool nvptx_mem_local_p (rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index b3bb97c3c14..2a694926b7a 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -3150,7 +3150,8 @@ nvptx_reorg_uniform_simt () rtx pred = nvptx_get_unisimt_predicate (); pred = gen_rtx_NE (BImode, pred, const0_rtx); pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); - validate_change (insn, &PATTERN (insn), pat, false); + bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); + gcc_assert (changed_p); } } @@ -6894,6 +6895,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type) return default_libc_has_function (fn_class, type); } +bool +nvptx_mem_local_p (rtx mem) +{ + gcc_assert (GET_CODE (mem) == MEM); + + struct address_info info; + decompose_mem_address (&info, mem); + + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + if (TARGET_SOFT_STACK) + { + /* Frame-related doesn't mean local. */ + } + else + return true; + } + + return false; +} + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 92768dd9e95..d64dbfd8b33 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -54,6 +54,7 @@ (define_c_enum "unspec" [ (define_c_enum "unspecv" [ UNSPECV_LOCK UNSPECV_CAS + UNSPECV_CAS_LOCAL UNSPECV_XCHG UNSPECV_BARSYNC UNSPECV_WARPSYNC @@ -1771,8 +1772,14 @@ (define_expand "atomic_compare_and_swap<mode>" (match_operand:SI 7 "const_int_operand")] ;; failure model "" { - emit_insn (gen_atomic_compare_and_swap<mode>_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); + if (nvptx_mem_local_p (operands[2])) + emit_insn (gen_atomic_compare_and_swap<mode>_1_local + (operands[1], operands[2], operands[3], operands[4], + operands[6])); + else + emit_insn (gen_atomic_compare_and_swap<mode>_1 + (operands[1], operands[2], operands[3], operands[4], + operands[6])); rtx cond = gen_reg_rtx (BImode); emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); @@ -1780,23 +1787,18 @@ (define_expand "atomic_compare_and_swap<mode>" DONE; }) -(define_insn "atomic_compare_and_swap<mode>_1" +(define_insn "atomic_compare_and_swap<mode>_1_local" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (unspec_volatile:SDIM [(match_operand:SDIM 1 "memory_operand" "+m") (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") (match_operand:SI 4 "const_int_operand")] - UNSPECV_CAS)) + UNSPECV_CAS_LOCAL)) (set (match_dup 1) - (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) - { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1807,13 +1809,26 @@ (define_insn "atomic_compare_and_swap<mode>_1" output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); output_asm_insn ("}", NULL); return ""; - } + } + [(set_attr "predicable" "false")]) + +(define_insn "atomic_compare_and_swap<mode>_1" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "memory_operand" "+m") + (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") + (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CAS)) + (set (match_dup 1) + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + "" + { const char *t - = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; return nvptx_output_atomic_insn (t, operands, 1, 4); } - [(set_attr "atomic" "true") - (set_attr "predicable" "false")]) + [(set_attr "atomic" "true")]) (define_insn "atomic_exchange<mode>" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output @@ -1825,10 +1840,7 @@ (define_insn "atomic_exchange<mode>" (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1855,10 +1867,7 @@ (define_insn "atomic_fetch_add<mode>" (match_dup 1))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1888,10 +1897,7 @@ (define_insn "atomic_fetch_addsf" (match_dup 1))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1924,10 +1930,7 @@ (define_insn "atomic_fetch_<logic><mode>" (match_dup 1))] "<MODE>mode == SImode || TARGET_SM35" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); diff --git a/gcc/testsuite/gcc.target/nvptx/softstack-2.c b/gcc/testsuite/gcc.target/nvptx/softstack-2.c new file mode 100644 index 00000000000..cccfda947d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/softstack-2.c @@ -0,0 +1,11 @@ +/* { dg-options "-O2 -msoft-stack" } */ + +int +f (void) +{ + int a = 0; + return __sync_lock_test_and_set (&a, 1); +} + +/* { dg-final { scan-assembler-times "atom.exch" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c new file mode 100644 index 00000000000..1bc0adae014 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -muniform-simt" } */ + +enum memmodel +{ + MEMMODEL_RELAXED = 0, +}; + +int a = 0; + +int +f (void) +{ + int expected = 1; + return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED, + MEMMODEL_RELAXED); +} + +/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */