Hi,

This patches set addresses the problem of inefficient usage of x86 addressing 
modes in PIC.  Looking into generated 32bit PIC assembler we may see lots of 
'leal  <symbol>@GOTOFF, %reg' instructions.  Almost in all cases this constant 
value may be propagated into following address operand, but it doesn't happen 
for various reasons.

I scanned assembler generated for SPEC2000 on O2 and Ofast + LTO to measure 
patches effect.  Here are results for the whole patches set:

Test         O2 ref     patched    Ofast + LTO ref   patched
164.gzip        12      0 (-100%)            39      0 (-100%)
175.vpr         0       0 (-0%)              4       0 (-100%)
176.gcc         141     6 (-96%)             294     10 (-97%)
181.mcf         4       0 (-100%)            4       2 (-50%)
186.crafty      28      0 (-100%)            277     16 (-95%)
197.parser      18      0 (-100%)            20      1 (-95%)
252.eon         593     0 (-100%)            410     0 (-100%)
253.perlbmk     2       0 (-100%)            29      0 (-100%)
254.gap         46      0 (-100%)            294     77 (-74%)
255.vortex      7       0 (-100%)            21      3 (-86%)
256.bzip2       7       0 (-100%)            15      0 (-100%)
300.twolf       0       0 (-0%)              1       13 (+1200%)
168.wupwise     0       0 (-0%)              0       0 (-0%)
171.swim        8       1 (-88%)             3       1 (-67%)
172.mgrid       0       0 (-0%)              3       0 (-100%)
173.applu       25      1 (-96%)             197     85 (-57%)
177.mesa        1       0 (-100%)            3       0 (-100%)
178.galgel      0       0 (-0%)              5       0 (-100%)
179.art         4       0 (-100%)            4       0 (-100%)
183.equake      5       0 (-100%)            4       0 (-100%)
187.facerec     38      0 (-100%)            32      10 (-69%)
188.ammp        2       0 (-100%)            2       0 (-100%)
189.lucas       15      0 (-100%)            11      0 (-100%)
191.fma3d       211     0 (-100%)            1480    172 (-89%)
200.sixtrack    677     93 (-87%)            1013    67 (-94%)
301.apsi        22      1 (-96%)             12      1 (-92%)

Performance results (Ofast + LTO) measured on Avoton server:

SPEC2000INT +0.81%
SPEC2000FP  +0.19%


The first patch fixes ix86_address_cost.  Previous change in this function was 
made after pseudo PIC register introduction and it tried to keep zero cost for 
PIC register.  Unfortunately it doesn't work for cases when PIC register is the 
only register used in address.  Thus addresses [pic_reg + const] and [pic_reg + 
reg] get the same cost 1 which doesn't match the goal and makes fwprop to 
reject propagation.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  OK for trunk?

Thanks,
Ilya
--
gcc/

2015-03-10  Ilya Enkovich  <ilya.enkov...@intel.com>

        PR target/65103
        * gcc/config/i386/i386.c (ix86_address_cost): Fix cost of a PIC
        register.

gcc/testsuite/

2015-03-10  Ilya Enkovich  <ilya.enkov...@intel.com>

        PR target/65103
        * gcc.target/i386/pr65103-1.c: New.


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ab8f03a..06eacd0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12931,30 +12931,27 @@ ix86_address_cost (rtx x, machine_mode, addr_space_t, 
bool)
   if (parts.index && GET_CODE (parts.index) == SUBREG)
     parts.index = SUBREG_REG (parts.index);
 
-  /* Attempt to minimize number of registers in the address.  */
-  if ((parts.base
-       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
-      || (parts.index
-         && (!REG_P (parts.index)
-             || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
-    cost++;
+  /* Attempt to minimize number of registers in the address.
 
-  /* When address base or index is "pic_offset_table_rtx" we don't increase
+     When address base or index is "pic_offset_table_rtx" we don't increase
      address cost.  When a memopt with "pic_offset_table_rtx" is not invariant
      itself it most likely means that base or index is not invariant.
      Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
      profitable for x86.  */
   if (parts.base
-      && (current_pass->type == GIMPLE_PASS
-         || (!pic_offset_table_rtx
-             || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
-      && parts.index
       && (current_pass->type == GIMPLE_PASS
-         || (!pic_offset_table_rtx
-             || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
+         || !pic_offset_table_rtx
+         || !REG_P (parts.base)
+         || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
+    cost++;
+
+  if (parts.index
       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
-      && parts.base != parts.index)
+      && (current_pass->type == GIMPLE_PASS
+         || !pic_offset_table_rtx
+         || !REG_P (parts.index)
+         || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
     cost++;
 
   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
diff --git a/gcc/testsuite/gcc.target/i386/pr65103-1.c 
b/gcc/testsuite/gcc.target/i386/pr65103-1.c
new file mode 100644
index 0000000..a3284b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr65103-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-m32 -O2 -fPIE" } */
+/* { dg-final { scan-assembler-not "GOTOFF," } } */
+
+typedef struct S
+{
+  int a;
+  int sum;
+  int delta;
+} S;
+
+S gs;
+int global_opt (int max)
+{
+  while (gs.sum < max)
+    gs.sum += gs.delta;
+  return gs.a;
+}

Reply via email to