Hello! One notable omission from x87 rounding sequences is plain round(), since x87 does not support round-to-nearest, away-from-zero directly. However, this function can be synthesized as round(a) = sgn(a) * floor(fabs(a) + 0.5). Fortunately, we have all building blocks ready in i386.md, we should just wire them correctly.
The x87 expansion is protected with flag_unsafe_math_optimizations, since the input range is reduced (true?) due to addition - OTOH, insn that calculates floor is protected by this flag anyway. 2011-08-08 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.c (ix86_emit_i387_round): New function. * config/i386/i386-protos.h (ix86_emit_i387_round): Declare. * config/i386/i386.md (round<mode>2): Use X87MODEF mode iterator. Use ix86_emit_i387_round to expand round function for i387 math. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu. It also computes correctly +-NaN, +-Inf and all inputs I have thrown in. Any comments? Uros.
Index: i386.md =================================================================== --- i386.md (revision 177572) +++ i386.md (working copy) @@ -14362,17 +14396,31 @@ }) (define_expand "round<mode>2" - [(match_operand:MODEF 0 "register_operand" "") - (match_operand:MODEF 1 "nonimmediate_operand" "")] - "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math && !flag_rounding_math" + [(match_operand:X87MODEF 0 "register_operand" "") + (match_operand:X87MODEF 1 "nonimmediate_operand" "")] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math)" { if (optimize_insn_for_size_p ()) FAIL; - if (TARGET_64BIT || (<MODE>mode != DFmode)) - ix86_expand_round (operand0, operand1); + + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math) + { + if (TARGET_64BIT || (<MODE>mode != DFmode)) + ix86_expand_round (operands[0], operands[1]); + else + ix86_expand_rounddf_32 (operands[0], operands[1]); + } else - ix86_expand_rounddf_32 (operand0, operand1); + { + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_emit_i387_round (operands[0], operands[1]); + } DONE; }) Index: i386-protos.h =================================================================== --- i386-protos.h (revision 177572) +++ i386-protos.h (working copy) @@ -163,6 +163,7 @@ extern void x86_emit_floatuns (rtx [2]); extern void ix86_emit_fp_unordered_jump (rtx); extern void ix86_emit_i387_log1p (rtx, rtx); +extern void ix86_emit_i387_round (rtx, rtx); extern void ix86_emit_swdivsf (rtx, rtx, rtx, enum machine_mode); extern void ix86_emit_swsqrtsf (rtx, rtx, enum machine_mode, bool); Index: i386.c =================================================================== --- i386.c (revision 177572) +++ i386.c (working copy) @@ -31722,6 +31729,76 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1) emit_label (label2); } +/* Emit code for round calculation. */ +void ix86_emit_i387_round (rtx op0, rtx op1) +{ + enum machine_mode mode = GET_MODE (op0); + rtx e1, e2, res, tmp0, tmp1, half; + rtx scratch = gen_reg_rtx (HImode); + rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); + rtx jump_label = gen_label_rtx (); + rtx insn; + + e1 = gen_reg_rtx (mode); + e2 = gen_reg_rtx (mode); + res = gen_reg_rtx (mode); + + half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, mode); + + /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */ + + /* scratch = fxam(op1) */ + emit_insn (gen_rtx_SET (VOIDmode, scratch, + gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1), + UNSPEC_FXAM))); + /* e1 = fabs(op1) */ + emit_insn (gen_rtx_SET (VOIDmode, e1, + gen_rtx_ABS (mode, op1))); + + /* e2 = e1 + 0.5 */ + half = force_reg (mode, half); + emit_insn (gen_rtx_SET (VOIDmode, e2, + gen_rtx_PLUS (mode, e1, half))); + + /* res = floor(e2) */ + if (mode != XFmode) + { + tmp0 = gen_reg_rtx (XFmode); + tmp1 = gen_reg_rtx (XFmode); + + emit_insn (gen_rtx_SET (VOIDmode, tmp1, + gen_rtx_FLOAT_EXTEND (XFmode, e2))); + } + else + tmp0 = res, tmp1 = e2; + + emit_insn (gen_frndintxf2_floor (tmp0, tmp1)); + + if (mode != XFmode) + emit_insn (gen_rtx_SET (VOIDmode, res, + gen_rtx_UNSPEC (mode, gen_rtvec (1, tmp0), + UNSPEC_TRUNC_NOOP))); + /* flags = signbit(a) */ + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02))); + + /* if (flags) then res = -res */ + tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_EQ (VOIDmode, flags, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, jump_label), + pc_rtx); + insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0)); + predict_jump (REG_BR_PROB_BASE * 50 / 100); + JUMP_LABEL (insn) = jump_label; + + emit_insn (gen_rtx_SET (VOIDmode, res, + gen_rtx_NEG (mode, res))); + + emit_label (jump_label); + LABEL_NUSES (jump_label) = 1; + + emit_move_insn (op0, res); +} + /* Output code to perform a Newton-Rhapson approximation of a single precision floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */