Other subword atomic patterns use riscv_subword_address to calculate the aligned address, shift amount, mask and !mask. atomic_test_and_set was implemented before the common function was added. After this patch all subword atomic patterns use riscv_subword_address.
gcc/ChangeLog: * config/riscv/sync.md: Use riscv_subword_address function to calculate the address and shift in atomic_test_and_set. Signed-off-by: Patrick O'Neill <patr...@rivosinc.com> --- Tested using r14-5040-g5dc2ba333f8. This patch causes this codegen to regress (adds a mv) but *only* on -O0. extern void abort(); short x; int main() { if ( __atomic_test_and_set(&x, __ATOMIC_SEQ_CST)) abort(); } Baseline: main: addi sp,sp,-16 sd ra,8(sp) sd s0,0(sp) addi s0,sp,16 lui a5,%hi(x) addi a5,a5,%lo(x) andi a4,a5,-4 andi a5,a5,3 li a3,1 slliw a5,a5,3 sllw a2,a3,a5 amoor.w.aqrl a3,a2,0(a4) srlw a5,a3,a5 andi a5,a5,0xff beq a5,zero,.L2 call abort .L2: li a5,0 mv a0,a5 ld ra,8(sp) ld s0,0(sp) addi sp,sp,16 jr ra After patch there is an additional mv: main: addi sp,sp,-16 sd ra,8(sp) sd s0,0(sp) addi s0,sp,16 lui a5,%hi(x) addi a5,a5,%lo(x) andi a3,a5,-4 andi a5,a5,3 slliw a5,a5,3 li a4,1 sllw a2,a4,a5 amoor.w.aqrl a4,a2,0(a3) sraw a4,a4,a5 > mv a5,a4 andi a5,a5,0xff beq a5,zero,.L2 call abort .L2: li a5,0 mv a0,a5 ld ra,8(sp) ld s0,0(sp) addi sp,sp,16 jr ra This can be fixed using: diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index ad4751febd2..a9539977321 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -530,10 +530,9 @@ emit_insn (gen_atomic_fetch_orsi (old, aligned_mem, shifted_set, model)); - emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, - gen_lowpart (QImode, shift))); - - emit_move_insn (operands[0], gen_lowpart (QImode, old)); + emit_move_insn (gen_lowpart (SImode, operands[0]), + gen_rtx_ASHIFTRT (SImode, old, + gen_lowpart (QImode, shift))); DONE; }) But I think it hurts read/grokability of the .md sequence. If it's worth changing for -O0 generated sequences, let me know and I'll send a follow up patch. --- gcc/config/riscv/sync.md | 41 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index 6ff3493b5ce..ad4751febd2 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -504,43 +504,36 @@ (set (attr "length") (const_int 28))]) (define_expand "atomic_test_and_set" - [(match_operand:QI 0 "register_operand" "") ;; bool output + [(match_operand:QI 0 "register_operand" "") ;; bool output (match_operand:QI 1 "memory_operand" "+A") ;; memory - (match_operand:SI 2 "const_int_operand" "")] ;; model + (match_operand:SI 2 "const_int_operand" "")] ;; model "TARGET_ATOMIC" { /* We have no QImode atomics, so use the address LSBs to form a mask, then use an aligned SImode atomic. */ - rtx result = operands[0]; + rtx old = gen_reg_rtx (SImode); rtx mem = operands[1]; rtx model = operands[2]; - rtx addr = force_reg (Pmode, XEXP (mem, 0)); - - rtx aligned_addr = gen_reg_rtx (Pmode); - emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, GEN_INT (-4))); + rtx set = gen_reg_rtx (QImode); + rtx aligned_mem = gen_reg_rtx (SImode); + rtx shift = gen_reg_rtx (SImode); - rtx aligned_mem = change_address (mem, SImode, aligned_addr); - set_mem_alias_set (aligned_mem, 0); + /* Unused. */ + rtx _mask = gen_reg_rtx (SImode); + rtx _not_mask = gen_reg_rtx (SImode); - rtx offset = gen_reg_rtx (SImode); - emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), - GEN_INT (3))); + riscv_subword_address (mem, &aligned_mem, &shift, &_mask, &_not_mask); - rtx tmp = gen_reg_rtx (SImode); - emit_move_insn (tmp, GEN_INT (1)); + emit_move_insn (set, GEN_INT (1)); + rtx shifted_set = gen_reg_rtx (SImode); + riscv_lshift_subword (QImode, set, shift, &shifted_set); - rtx shmt = gen_reg_rtx (SImode); - emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3))); + emit_insn (gen_atomic_fetch_orsi (old, aligned_mem, shifted_set, model)); - rtx word = gen_reg_rtx (SImode); - emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, - gen_lowpart (QImode, shmt))); + emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, + gen_lowpart (QImode, shift))); - tmp = gen_reg_rtx (SImode); - emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model)); + emit_move_insn (operands[0], gen_lowpart (QImode, old)); - emit_move_insn (gen_lowpart (SImode, result), - gen_rtx_LSHIFTRT (SImode, tmp, - gen_lowpart (QImode, shmt))); DONE; }) -- 2.34.1