This allows us to enforce 16 and 64-byte alignment without any extra overhead.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- This patch is dependent on by sparc improvements branch, along with Sergey's alignment improvement patch. A buildable tree is at git://github.com/rth7680/qemu.git tgt-sparc-tmp r~ target-sparc/translate.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/target-sparc/translate.c b/target-sparc/translate.c index 28416fa..f384cbf 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -2377,6 +2377,7 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr, tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop); break; case 16: + /* Only 8 byte alignment required, which is automatic here. */ tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop); @@ -2389,20 +2390,23 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr, case GET_ASI_BLOCK: /* Valid for lddfa on aligned registers only. */ if (size == 8 && (rd & 7) == 0) { + TCGMemOp memop; TCGv eight; int i; - gen_check_align(addr, 0x3f); gen_address_mask(dc, addr); + /* The first operation checks required alignment. */ + memop = da.memop | MO_ALIGN_64; eight = tcg_const_tl(8); for (i = 0; ; ++i) { tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr, - da.mem_idx, da.memop); + da.mem_idx, memop); if (i == 7) { break; } tcg_gen_add_tl(addr, addr, eight); + memop = da.memop; } tcg_temp_free(eight); } else { @@ -2445,6 +2449,7 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr, gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, r_mop); break; case 16: + /* Only 8 byte alignment required, which is automatic here. */ gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, r_mop); tcg_gen_addi_tl(addr, addr, 8); gen_helper_ld_asi(cpu_fpr[rd/2+1], cpu_env, addr, r_asi, r_mop); @@ -2480,6 +2485,7 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr, tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop); break; case 16: + /* Only 8 byte alignment required, which is automatic here. */ tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_st_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop); @@ -2492,20 +2498,23 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr, case GET_ASI_BLOCK: /* Valid for stdfa on aligned registers only. */ if (size == 8 && (rd & 7) == 0) { + TCGMemOp memop; TCGv eight; int i; - gen_check_align(addr, 0x3f); gen_address_mask(dc, addr); + /* The first operation checks required alignment. */ + memop = da.memop | MO_ALIGN_64; eight = tcg_const_tl(8); for (i = 0; ; ++i) { tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr, - da.mem_idx, da.memop); + da.mem_idx, memop); if (i == 7) { break; } tcg_gen_add_tl(addr, addr, eight); + memop = da.memop; } tcg_temp_free(eight); } else { @@ -2543,9 +2552,8 @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd) return; case GET_ASI_DTWINX: - gen_check_align(addr, 15); gen_address_mask(dc, addr); - tcg_gen_qemu_ld_i64(hi, addr, da.mem_idx, da.memop); + tcg_gen_qemu_ld_i64(hi, addr, da.mem_idx, da.memop | MO_ALIGN_16); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_ld_i64(lo, addr, da.mem_idx, da.memop); break; @@ -2598,9 +2606,8 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr, break; case GET_ASI_DTWINX: - gen_check_align(addr, 15); gen_address_mask(dc, addr); - tcg_gen_qemu_st_i64(hi, addr, da.mem_idx, da.memop); + tcg_gen_qemu_st_i64(hi, addr, da.mem_idx, da.memop | MO_ALIGN_16); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_st_i64(lo, addr, da.mem_idx, da.memop); break; @@ -5469,7 +5476,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) if (gen_trap_ifnofpu(dc)) { goto jmp_insn; } - gen_check_align(cpu_addr, 7); gen_stf_asi(dc, cpu_addr, insn, 16, QFPREG(rd)); } break; -- 2.5.5