On Tue, Oct 4, 2011 at 6:06 PM, H.J. Lu <hjl.to...@gmail.com> wrote:

>> OTOH, x86_64 and i686 targets can also benefit from this change. If
>> combine can't create more complex address (covered by lea), then it
>> will simply propagate memory operand back into the add insn. It looks
>> to me that we can't loose here, so:
>>
>>  /* Improve address combine.  */
>>  if (code == PLUS && MEM_P (src2))
>>    src2 = force_reg (mode, src2);
>>
>> Any opinions?
>>
>
> It doesn't work with 64bit libstdc++:

Yeah, yeah. ix86_output_mi_thunk has some ...  issues.

Please try attached patch that introduces ix86_emit_binop and uses it
in a bunch of places.

Uros.
Index: i386-protos.h
===================================================================
--- i386-protos.h       (revision 179506)
+++ i386-protos.h       (working copy)
@@ -94,6 +94,7 @@ extern bool ix86_lea_outperforms (rtx, unsigned in
                                  unsigned int, unsigned int);
 extern bool ix86_avoid_lea_for_add (rtx, rtx[]);
 extern bool ix86_avoid_lea_for_addr (rtx, rtx[]);
+extern void ix86_emit_binop (enum rtx_code, enum machine_mode, rtx, rtx);
 extern void ix86_split_lea_for_addr (rtx[], enum machine_mode);
 extern bool ix86_lea_for_add_ok (rtx, rtx[]);
 extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
Index: i386.c
===================================================================
--- i386.c      (revision 179506)
+++ i386.c      (working copy)
@@ -15727,6 +15727,10 @@ ix86_fixup_binary_operands (enum rtx_code code, en
   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
     src1 = force_reg (mode, src1);
 
+  /* Improve address combine.  */
+  if (code == PLUS && MEM_P (src2))
+    src2 = force_reg (mode, src2);
+
   operands[1] = src1;
   operands[2] = src2;
   return dst;
@@ -16470,6 +16474,20 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
 }
 
+/* Emit x86 binary operand CODE in mode MODE, where the first operand
+   matches destination.  RTX includes clobber of FLAGS_REG.  */
+
+extern void ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
+                            rtx dst, rtx src)
+{
+  rtx op, clob;
+
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
+  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+  
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+}
+
 /* Split lea instructions into a sequence of instructions
    which are executed on ALU to avoid AGU stalls.
    It is assumed that it is allowed to clobber flags register
@@ -16482,8 +16500,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
   unsigned int regno1 = INVALID_REGNUM;
   unsigned int regno2 = INVALID_REGNUM;
   struct ix86_address parts;
-  rtx tmp, clob;
-  rtvec par;
+  rtx tmp;
   int ok, adds;
 
   ok = ix86_decompose_address (operands[1], &parts);
@@ -16515,14 +16532,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
          gcc_assert (regno2 != regno0);
 
          for (adds = parts.scale; adds > 0; adds--)
-           {
-             tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
-             tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-             clob = gen_rtx_CLOBBER (VOIDmode,
-                                     gen_rtx_REG (CCmode, FLAGS_REG));
-             par = gen_rtvec (2, tmp, clob);
-             emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-           }
+           ix86_emit_binop (PLUS, mode, operands[0], parts.index);
        }
       else
        {
@@ -16531,30 +16541,14 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
            emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
 
          /* Use shift for scaling.  */
-         tmp = gen_rtx_ASHIFT (mode, operands[0],
-                               GEN_INT (exact_log2 (parts.scale)));
-         tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-         clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-         par = gen_rtvec (2, tmp, clob);
-         emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+         ix86_emit_binop (ASHIFT, mode, operands[0],
+                          GEN_INT (exact_log2 (parts.scale)));
 
          if (parts.base)
-           {
-             tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
-             tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-             clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 
FLAGS_REG));
-             par = gen_rtvec (2, tmp, clob);
-             emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-           }
+           ix86_emit_binop (PLUS, mode, operands[0], parts.base);
 
          if (parts.disp && parts.disp != const0_rtx)
-           {
-             tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
-             tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-             clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 
FLAGS_REG));
-             par = gen_rtvec (2, tmp, clob);
-             emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-           }
+           ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
        }
     }
   else if (!parts.base && !parts.index)
@@ -16565,41 +16559,32 @@ ix86_split_lea_for_addr (rtx operands[], enum mach
   else
     {
       if (!parts.base)
-      {
-        if (regno0 != regno2)
-         emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
-      }
+       {
+         if (regno0 != regno2)
+           emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
+       }
       else if (!parts.index)
-      {
-        if (regno0 != regno1)
-          emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
-      }
-      else
-      {
-       if (regno0 == regno1)
-         tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
-       else if (regno0 == regno2)
-         tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
-       else
-         {
+       {
+         if (regno0 != regno1)
            emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
-           tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
-         }
+       }
+      else
+       {
+         if (regno0 == regno1)
+           tmp = parts.index;
+         else if (regno0 == regno2)
+           tmp = parts.base;
+         else
+           {
+             emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
+             tmp = parts.index;
+           }
 
-        tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-       par = gen_rtvec (2, tmp, clob);
-       emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-      }
+         ix86_emit_binop (PLUS, mode, operands[0], tmp);
+       }
 
       if (parts.disp && parts.disp != const0_rtx)
-      {
-        tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
-        tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-       par = gen_rtvec (2, tmp, clob);
-       emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-      }
+       ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
     }
 }
 
@@ -30940,7 +30925,7 @@ x86_output_mi_thunk (FILE *file,
            }
        }
 
-      emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
+      ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
     }
 
   /* Adjust the this parameter by a value stored in the vtable.  */
@@ -30983,7 +30968,7 @@ x86_output_mi_thunk (FILE *file,
                                                  REGNO (this_reg)),
                                     vcall_mem));
       else
-       emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
+       ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
     }
 
   /* If necessary, drop THIS back to its stack slot.  */

Reply via email to