On 16 May 2018 at 16:52, Richard Henderson <richard.hender...@linaro.org> wrote: > diff --git a/target/arm/translate.c b/target/arm/translate.c > index 731cf327a1..613598d090 100644 > --- a/target/arm/translate.c > +++ b/target/arm/translate.c
Just noticed, but in the 32-bit translator where the argument to get_fpstatus_ptr() is "is this neon?" (ie "do we use the standard FPSCR value"), shouldn't we be passing 'true' to get_fpstatus_ptr() for the halfprec conversions in disas_neon_data_insn() ? I haven't tested, but I imagine that otherwise you get the wrong results if the input is a denormal and FPSCR.FZ is 0 or if the output should be a NaN and FPSCR.DN is 0. > @@ -7222,53 +7247,70 @@ static int disas_neon_data_insn(DisasContext *s, > uint32_t insn) > } > break; > case NEON_2RM_VCVT_F16_F32: > + { > + TCGv_ptr fpst; > + TCGv_i32 ahp; > + > if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) || > q || (rm & 1)) { > return 1; > } > tmp = tcg_temp_new_i32(); > tmp2 = tcg_temp_new_i32(); > + fpst = get_fpstatus_ptr(false); > + ahp = get_ahp_flag(); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); > - gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); > - gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); > + gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp); > tcg_gen_shli_i32(tmp2, tmp2, 16); > tcg_gen_or_i32(tmp2, tmp2, tmp); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); > - gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); > neon_store_reg(rd, 0, tmp2); > tmp2 = tcg_temp_new_i32(); > - gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); > + gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp); > tcg_gen_shli_i32(tmp2, tmp2, 16); > tcg_gen_or_i32(tmp2, tmp2, tmp); > neon_store_reg(rd, 1, tmp2); > tcg_temp_free_i32(tmp); > + tcg_temp_free_i32(ahp); > + tcg_temp_free_ptr(fpst); > break; > + } > case NEON_2RM_VCVT_F32_F16: > + { > + TCGv_ptr fpst; > + TCGv_i32 ahp; > if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) || > q || (rd & 1)) { > return 1; > } > + fpst = get_fpstatus_ptr(false); > + ahp = get_ahp_flag(); > tmp3 = tcg_temp_new_i32(); > tmp = neon_load_reg(rm, 0); > tmp2 = neon_load_reg(rm, 1); > tcg_gen_ext16u_i32(tmp3, tmp); > - gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); > tcg_gen_shri_i32(tmp3, tmp, 16); > - gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); > tcg_temp_free_i32(tmp); > tcg_gen_ext16u_i32(tmp3, tmp2); > - gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); > tcg_gen_shri_i32(tmp3, tmp2, 16); > - gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); > tcg_temp_free_i32(tmp2); > tcg_temp_free_i32(tmp3); > + tcg_temp_free_i32(ahp); > + tcg_temp_free_ptr(fpst); > break; > + } > case NEON_2RM_AESE: case NEON_2RM_AESMC: > if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) > || ((rm | rd) & 1)) { > -- > 2.17.0 > thanks -- PMM