Hi! As reported by Uros, the fld a fld b fxchg %st(1) optimization to fld b fld a misses several important cases, one is FLOAT_EXTEND memory loads where the memory is SFmode or DFmode but we extend it to a wider mode, and the other is when we load a known i?87 constant like 0.0, 1.0, PI etc.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-02-21 Jakub Jelinek <ja...@redhat.com> PR target/70465 * reg-stack.c (emit_swap_insn): Treat (float_extend:?F (mem:?F)) and (const_double:?F) like (mem:?F) for the purpose of fxch %st(1) elimination by swapping fld*. * gcc.target/i386/pr70465-2.c: New test. --- gcc/reg-stack.c.jj 2017-01-26 09:46:03.000000000 +0100 +++ gcc/reg-stack.c 2017-02-21 16:22:45.208895719 +0100 @@ -895,12 +895,16 @@ emit_swap_insn (rtx_insn *insn, stack_pt just use fld b fld a - if possible. */ + if possible. Similarly for fld1, fldz, fldpi etc. instead of any + of the loads or for float extension from memory. */ + i1src = SET_SRC (i1set); + if (GET_CODE (i1src) == FLOAT_EXTEND) + i1src = XEXP (i1src, 0); if (REG_P (i1dest) && REGNO (i1dest) == FIRST_STACK_REG - && MEM_P (SET_SRC (i1set)) - && !side_effects_p (SET_SRC (i1set)) + && (MEM_P (i1src) || GET_CODE (i1src) == CONST_DOUBLE) + && !side_effects_p (i1src) && hard_regno == FIRST_STACK_REG + 1 && i1 != BB_HEAD (current_block)) { @@ -930,6 +934,9 @@ emit_swap_insn (rtx_insn *insn, stack_pt && (i2set = single_set (i2)) != NULL_RTX) { rtx i2dest = *get_true_reg (&SET_DEST (i2set)); + rtx i2src = SET_SRC (i2set); + if (GET_CODE (i2src) == FLOAT_EXTEND) + i2src = XEXP (i2src, 0); /* If the last two insns before insn that involve stack regs are loads, where the latter (i1) pushes onto the register stack and thus @@ -937,9 +944,9 @@ emit_swap_insn (rtx_insn *insn, stack_pt %st to %st(1), consider swapping them. */ if (REG_P (i2dest) && REGNO (i2dest) == FIRST_STACK_REG - && MEM_P (SET_SRC (i2set)) + && (MEM_P (i2src) || GET_CODE (i2src) == CONST_DOUBLE) /* Ensure i2 doesn't have other side-effects. */ - && !side_effects_p (SET_SRC (i2set)) + && !side_effects_p (i2src) /* And that the two instructions can actually be swapped, i.e. there shouldn't be any stores in between i2 and i1 that might alias with --- gcc/testsuite/gcc.target/i386/pr70465-2.c.jj 2017-02-21 16:23:46.982090658 +0100 +++ gcc/testsuite/gcc.target/i386/pr70465-2.c 2017-02-21 16:16:25.000000000 +0100 @@ -0,0 +1,25 @@ +/* PR target/70465 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mfpmath=387 -fomit-frame-pointer" } */ +/* { dg-final { scan-assembler-not "fxch\t%st.1" } } */ + +extern float d[1024]; + +static inline long double +foo (long double a, long double b) +{ + return a < b ? a : b; +} + +static inline long double +bar (long double a, long double b) +{ + return a > b ? a : b; +} + +float +baz (void) +{ + long double c = d[0]; + return foo (bar (c, 0.0l), 1.0l); +} Jakub