If the vector is naturally aligned, it cannot cross cache lines so the LSX load is guaranteed to be atomic. Thus we can use LSX to do the lock-free atomic load, instead of using a lock.
gcc/ChangeLog: * config/loongarch/sync.md (atomic_loadti_lsx): New define_insn. (atomic_loadti): New define_expand. --- gcc/config/loongarch/sync.md | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index f1ab132163c..b0d3c854c56 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -135,6 +135,47 @@ (define_insn "atomic_load<mode>" } [(set (attr "length") (const_int 12))]) +(define_insn "atomic_loadti_lsx" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (unspec_volatile:V2DI + [(match_operand:TI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_ATOMIC_LOAD))] + "ISA_HAS_LSX && TARGET_64BIT" +{ + enum memmodel model = memmodel_base (INTVAL (operands[2])); + + switch (model) + { + case MEMMODEL_SEQ_CST: + output_asm_insn ("dbar\t0x11", operands); + /* fall through */ + case MEMMODEL_ACQUIRE: + case MEMMODEL_RELAXED: + return "vld\t%w0,%1\\n\\t%G2"; + + default: + gcc_unreachable (); + } +} + [(set (attr "length") (const_int 12))]) + +(define_expand "atomic_loadti" + [(match_operand:TI 0 "register_operand" "=r") + (match_operand:TI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand")] + "ISA_HAS_LSX && TARGET_64BIT" +{ + rtx vr = gen_reg_rtx (V2DImode); + + emit_insn (gen_atomic_loadti_lsx (vr, operands[1], operands[2])); + for (int i = 0; i < 2; i++) + emit_insn ( + gen_lsx_vpickve2gr_d (loongarch_subword (operands[0], i), vr, + GEN_INT (i))); + DONE; +}) + ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. (define_insn "atomic_store<mode>" [(set (match_operand:QHWD 0 "memory_operand" "=m") -- 2.48.1