On Tue, Oct 4, 2011 at 6:06 PM, H.J. Lu <hjl.to...@gmail.com> wrote: >> OTOH, x86_64 and i686 targets can also benefit from this change. If >> combine can't create more complex address (covered by lea), then it >> will simply propagate memory operand back into the add insn. It looks >> to me that we can't loose here, so: >> >> /* Improve address combine. */ >> if (code == PLUS && MEM_P (src2)) >> src2 = force_reg (mode, src2); >> >> Any opinions? >> > > It doesn't work with 64bit libstdc++:
Yeah, yeah. ix86_output_mi_thunk has some ... issues. Please try attached patch that introduces ix86_emit_binop and uses it in a bunch of places. Uros.
Index: i386-protos.h =================================================================== --- i386-protos.h (revision 179506) +++ i386-protos.h (working copy) @@ -94,6 +94,7 @@ extern bool ix86_lea_outperforms (rtx, unsigned in unsigned int, unsigned int); extern bool ix86_avoid_lea_for_add (rtx, rtx[]); extern bool ix86_avoid_lea_for_addr (rtx, rtx[]); +extern void ix86_emit_binop (enum rtx_code, enum machine_mode, rtx, rtx); extern void ix86_split_lea_for_addr (rtx[], enum machine_mode); extern bool ix86_lea_for_add_ok (rtx, rtx[]); extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high); Index: i386.c =================================================================== --- i386.c (revision 179506) +++ i386.c (working copy) @@ -15727,6 +15727,10 @@ ix86_fixup_binary_operands (enum rtx_code code, en if (MEM_P (src1) && !rtx_equal_p (dst, src1)) src1 = force_reg (mode, src1); + /* Improve address combine. */ + if (code == PLUS && MEM_P (src2)) + src2 = force_reg (mode, src2); + operands[1] = src1; operands[2] = src2; return dst; @@ -16470,6 +16474,20 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost); } +/* Emit x86 binary operand CODE in mode MODE, where the first operand + matches destination. RTX includes clobber of FLAGS_REG. */ + +extern void ix86_emit_binop (enum rtx_code code, enum machine_mode mode, + rtx dst, rtx src) +{ + rtx op, clob; + + op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src)); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); +} + /* Split lea instructions into a sequence of instructions which are executed on ALU to avoid AGU stalls. It is assumed that it is allowed to clobber flags register @@ -16482,8 +16500,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach unsigned int regno1 = INVALID_REGNUM; unsigned int regno2 = INVALID_REGNUM; struct ix86_address parts; - rtx tmp, clob; - rtvec par; + rtx tmp; int ok, adds; ok = ix86_decompose_address (operands[1], &parts); @@ -16515,14 +16532,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach gcc_assert (regno2 != regno0); for (adds = parts.scale; adds > 0; adds--) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.index); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.index); } else { @@ -16531,30 +16541,14 @@ ix86_split_lea_for_addr (rtx operands[], enum mach emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); /* Use shift for scaling. */ - tmp = gen_rtx_ASHIFT (mode, operands[0], - GEN_INT (exact_log2 (parts.scale))); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + ix86_emit_binop (ASHIFT, mode, operands[0], + GEN_INT (exact_log2 (parts.scale))); if (parts.base) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.base); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.base); if (parts.disp && parts.disp != const0_rtx) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.disp); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.disp); } } else if (!parts.base && !parts.index) @@ -16565,41 +16559,32 @@ ix86_split_lea_for_addr (rtx operands[], enum mach else { if (!parts.base) - { - if (regno0 != regno2) - emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); - } + { + if (regno0 != regno2) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); + } else if (!parts.index) - { - if (regno0 != regno1) - emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); - } - else - { - if (regno0 == regno1) - tmp = gen_rtx_PLUS (mode, operands[0], parts.index); - else if (regno0 == regno2) - tmp = gen_rtx_PLUS (mode, operands[0], parts.base); - else - { + { + if (regno0 != regno1) emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); - tmp = gen_rtx_PLUS (mode, operands[0], parts.index); - } + } + else + { + if (regno0 == regno1) + tmp = parts.index; + else if (regno0 == regno2) + tmp = parts.base; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); + tmp = parts.index; + } - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], tmp); + } if (parts.disp && parts.disp != const0_rtx) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.disp); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.disp); } } @@ -30940,7 +30925,7 @@ x86_output_mi_thunk (FILE *file, } } - emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx)); + ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); } /* Adjust the this parameter by a value stored in the vtable. */ @@ -30983,7 +30968,7 @@ x86_output_mi_thunk (FILE *file, REGNO (this_reg)), vcall_mem)); else - emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem)); + ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); } /* If necessary, drop THIS back to its stack slot. */