Applying: target-i386: Use mulu2 and muls2 error: patch failed: target-i386/helper.h:14 error: target-i386/helper.h: patch does not apply error: patch failed: target-i386/int_helper.c:374 error: target-i386/int_helper.c: patch does not apply error: patch failed: target-i386/translate.c:4111 error: target-i386/translate.c: patch does not apply Patch failed at 0011 target-i386: Use mulu2 and muls2
On Wed, Feb 20, 2013 at 7:51 AM, Richard Henderson <r...@twiddle.net> wrote: > These correspond very closely to the insns that we're emulating. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > target-i386/helper.h | 4 -- > target-i386/int_helper.c | 40 ------------ > target-i386/translate.c | 167 > ++++++++++++++++------------------------------- > 3 files changed, 56 insertions(+), 155 deletions(-) > > diff --git a/target-i386/helper.h b/target-i386/helper.h > index 26a0cc8..d6974df 100644 > --- a/target-i386/helper.h > +++ b/target-i386/helper.h > @@ -14,12 +14,8 @@ DEF_HELPER_2(idivw_AX, void, env, tl) > DEF_HELPER_2(divl_EAX, void, env, tl) > DEF_HELPER_2(idivl_EAX, void, env, tl) > #ifdef TARGET_X86_64 > -DEF_HELPER_2(mulq_EAX_T0, void, env, tl) > -DEF_HELPER_2(imulq_EAX_T0, void, env, tl) > -DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl) > DEF_HELPER_2(divq_EAX, void, env, tl) > DEF_HELPER_2(idivq_EAX, void, env, tl) > -DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl) > #endif > > DEF_HELPER_2(aam, void, env, int) > diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c > index 3b56075..74c7c36 100644 > --- a/target-i386/int_helper.c > +++ b/target-i386/int_helper.c > @@ -374,46 +374,6 @@ static int idiv64(uint64_t *plow, uint64_t *phigh, > int64_t b) > return 0; > } > > -void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0) > -{ > - uint64_t r0, r1; > - > - mulu64(&r0, &r1, EAX, t0); > - EAX = r0; > - EDX = r1; > - CC_DST = r0; > - CC_SRC = r1; > -} > - > -target_ulong helper_umulh(target_ulong t0, target_ulong t1) > -{ > - uint64_t h, l; > - mulu64(&l, &h, t0, t1); > - return h; > -} > - > -void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0) > -{ > - uint64_t r0, r1; > - > - muls64(&r0, &r1, EAX, t0); > - EAX = r0; > - EDX = r1; > - CC_DST = r0; > - CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); > -} > - > -target_ulong helper_imulq_T0_T1(CPUX86State *env, target_ulong t0, > - target_ulong t1) > -{ > - uint64_t r0, r1; > - > - muls64(&r0, &r1, t0, t1); > - CC_DST = r0; > - CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); > - return r0; > -} > - > void helper_divq_EAX(CPUX86State *env, target_ulong t0) > { > uint64_t r0, r1; > diff --git a/target-i386/translate.c b/target-i386/translate.c > index 439d19e..1545e3f 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -4111,31 +4111,18 @@ static void gen_sse(CPUX86State *env, DisasContext > *s, int b, > ot = s->dflag == 2 ? OT_QUAD : OT_LONG; > gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); > switch (ot) { > - TCGv_i64 t0, t1; > default: > - t0 = tcg_temp_new_i64(); > - t1 = tcg_temp_new_i64(); > -#ifdef TARGET_X86_64 > - tcg_gen_ext32u_i64(t0, cpu_T[0]); > - tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]); > -#else > - tcg_gen_extu_i32_i64(t0, cpu_T[0]); > - tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]); > -#endif > - tcg_gen_mul_i64(t0, t0, t1); > - tcg_gen_trunc_i64_tl(cpu_T[0], t0); > - tcg_gen_shri_i64(t0, t0, 32); > - tcg_gen_trunc_i64_tl(cpu_T[1], t0); > - tcg_temp_free_i64(t0); > - tcg_temp_free_i64(t1); > - gen_op_mov_reg_T0(OT_LONG, s->vex_v); > - gen_op_mov_reg_T1(OT_LONG, reg); > + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); > + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]); > + tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32, > + cpu_tmp2_i32, cpu_tmp3_i32); > + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32); > + tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32); > break; > #ifdef TARGET_X86_64 > case OT_QUAD: > - tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]); > - tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]); > - gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]); > + tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg], > + cpu_T[0], cpu_regs[R_EDX]); > break; > #endif > } > @@ -5034,39 +5021,22 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > break; > default: > case OT_LONG: > -#ifdef TARGET_X86_64 > - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); > - tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); > - tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); > - tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); > - gen_op_mov_reg_T0(OT_LONG, R_EAX); > - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); > - gen_op_mov_reg_T0(OT_LONG, R_EDX); > - tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); > -#else > - { > - TCGv_i64 t0, t1; > - t0 = tcg_temp_new_i64(); > - t1 = tcg_temp_new_i64(); > - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); > - tcg_gen_extu_i32_i64(t0, cpu_T[0]); > - tcg_gen_extu_i32_i64(t1, cpu_T[1]); > - tcg_gen_mul_i64(t0, t0, t1); > - tcg_gen_trunc_i64_i32(cpu_T[0], t0); > - gen_op_mov_reg_T0(OT_LONG, R_EAX); > - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > - tcg_gen_shri_i64(t0, t0, 32); > - tcg_gen_trunc_i64_i32(cpu_T[0], t0); > - gen_op_mov_reg_T0(OT_LONG, R_EDX); > - tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); > - } > -#endif > + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); > + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]); > + tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32, > + cpu_tmp2_i32, cpu_tmp3_i32); > + tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32); > + tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32); > + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); > + tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]); > set_cc_op(s, CC_OP_MULL); > break; > #ifdef TARGET_X86_64 > case OT_QUAD: > - gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]); > + tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX], > + cpu_T[0], cpu_regs[R_EAX]); > + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); > + tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]); > set_cc_op(s, CC_OP_MULQ); > break; > #endif > @@ -5102,41 +5072,25 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > break; > default: > case OT_LONG: > -#ifdef TARGET_X86_64 > - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); > - tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); > - tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); > - tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); > - gen_op_mov_reg_T0(OT_LONG, R_EAX); > - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > - tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); > - tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); > - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); > - gen_op_mov_reg_T0(OT_LONG, R_EDX); > -#else > - { > - TCGv_i64 t0, t1; > - t0 = tcg_temp_new_i64(); > - t1 = tcg_temp_new_i64(); > - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); > - tcg_gen_ext_i32_i64(t0, cpu_T[0]); > - tcg_gen_ext_i32_i64(t1, cpu_T[1]); > - tcg_gen_mul_i64(t0, t0, t1); > - tcg_gen_trunc_i64_i32(cpu_T[0], t0); > - gen_op_mov_reg_T0(OT_LONG, R_EAX); > - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > - tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); > - tcg_gen_shri_i64(t0, t0, 32); > - tcg_gen_trunc_i64_i32(cpu_T[0], t0); > - gen_op_mov_reg_T0(OT_LONG, R_EDX); > - tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); > - } > -#endif > + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); > + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]); > + tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32, > + cpu_tmp2_i32, cpu_tmp3_i32); > + tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32); > + tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32); > + tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31); > + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); > + tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); > + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); > set_cc_op(s, CC_OP_MULL); > break; > #ifdef TARGET_X86_64 > case OT_QUAD: > - gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]); > + tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX], > + cpu_T[0], cpu_regs[R_EAX]); > + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); > + tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63); > + tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]); > set_cc_op(s, CC_OP_MULQ); > break; > #endif > @@ -5391,37 +5345,27 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > } else { > gen_op_mov_TN_reg(ot, 1, reg); > } > - > -#ifdef TARGET_X86_64 > - if (ot == OT_QUAD) { > - gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); > - } else > -#endif > - if (ot == OT_LONG) { > + switch (ot) { > #ifdef TARGET_X86_64 > - tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); > - tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); > - tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); > - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > - tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); > - tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); > -#else > - { > - TCGv_i64 t0, t1; > - t0 = tcg_temp_new_i64(); > - t1 = tcg_temp_new_i64(); > - tcg_gen_ext_i32_i64(t0, cpu_T[0]); > - tcg_gen_ext_i32_i64(t1, cpu_T[1]); > - tcg_gen_mul_i64(t0, t0, t1); > - tcg_gen_trunc_i64_i32(cpu_T[0], t0); > - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > - tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); > - tcg_gen_shri_i64(t0, t0, 32); > - tcg_gen_trunc_i64_i32(cpu_T[1], t0); > - tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); > - } > + case OT_QUAD: > + tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]); > + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]); > + tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63); > + tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]); > + break; > #endif > - } else { > + case OT_LONG: > + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); > + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); > + tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32, > + cpu_tmp2_i32, cpu_tmp3_i32); > + tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32); > + tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31); > + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]); > + tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); > + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); > + break; > + default: > tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); > tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); > /* XXX: use 32 bit mul which could be faster */ > @@ -5429,8 +5373,9 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); > tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); > tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); > + gen_op_mov_reg_T0(ot, reg); > + break; > } > - gen_op_mov_reg_T0(ot, reg); > set_cc_op(s, CC_OP_MULB + ot); > break; > case 0x1c0: > -- > 1.8.1.2 >