This is required for LSE2, where the pair must be treated atomically if it does not cross a 16-byte boundary. But it simplifies the code to do this always, just use the unpaired atomicity without LSE2.
Signed-off-by: Richard Henderson <[email protected]> --- target/arm/translate-a64.c | 77 ++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 16 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index fa793485c3..c0d55c9204 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3089,27 +3089,72 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); + MemOp mop = (size + 1) | s->be_data; + + /* + * With LSE2, non-sign-extending pairs are treated atomically if + * aligned, and if unaligned one of the pair will be completely + * within a 16-byte block and that element will be atomic. + * Otherwise each element is separately atomic. + * In all cases, issue one operation with the correct atomicity. + * + * This treats sign-extending loads like zero-extending loads, + * since that reuses the most code below. + */ + mop |= size << MO_ATMAX_SHIFT; + mop |= s->atom_data; + if (s->align_mem) { + mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + } if (is_load) { - TCGv_i64 tmp = tcg_temp_new_i64(); + if (size == 2) { + TCGv_i64 tmp = tcg_temp_new_i64(); - /* Do not modify tcg_rt before recognizing any exception - * from the second load. - */ - do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, - false, false, 0, false, false); - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, - false, false, 0, false, false); + tcg_gen_qemu_ld_i64(tmp, clean_addr, get_mem_index(s), mop); + if (s->be_data == MO_LE) { + tcg_gen_extr32_i64(tcg_rt, tcg_rt2, tmp); + } else { + tcg_gen_extr32_i64(tcg_rt2, tcg_rt, tmp); + } + if (is_signed) { + tcg_gen_ext32s_i64(tcg_rt, tcg_rt); + tcg_gen_ext32s_i64(tcg_rt2, tcg_rt2); + } + tcg_temp_free_i64(tmp); + } else { + TCGv_i128 tmp = tcg_temp_new_i128(); - tcg_gen_mov_i64(tcg_rt, tmp); - tcg_temp_free_i64(tmp); + tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); + } else { + tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); + } + tcg_temp_free_i128(tmp); + } } else { - do_gpr_st(s, tcg_rt, clean_addr, size, - false, 0, false, false); - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_st(s, tcg_rt2, clean_addr, size, - false, 0, false, false); + if (size == 2) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); + tcg_temp_free_i64(tmp); + } else { + TCGv_i128 tmp = tcg_temp_new_i128(); + + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); + tcg_temp_free_i128(tmp); + } } } -- 2.34.1
