https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88714

--- Comment #16 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Some more progress.
I've used
--- gcc/combine.c.jj    2019-01-10 11:43:17.050333949 +0100
+++ gcc/combine.c       2019-01-15 14:47:28.009094300 +0100
@@ -2319,6 +2319,9 @@ contains_muldiv (rtx x)
     }
 }


+int cxcnt = -1;
+int cxcurcnt = 0;
+
 /* Determine whether INSN can be used in a combination.  Return nonzero if
    not.  This is used in try_combine to detect early some cases where we
    can't perform combinations.  */
@@ -2361,7 +2364,8 @@ cant_combine_insn_p (rtx_insn *insn)
 #endif
          || (HARD_REGISTER_P (dest)
              && ! TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dest))
-             && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO
(dest))))))
+             && (targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO
(dest)))
+                 || (getenv ("COMBINE_FIRST") && cxcurcnt == cxcnt)))))
     return 1;

   return 0;
@@ -14993,6 +14997,12 @@ make_more_copies (void)
 {
   basic_block bb;

+  if (cxcnt == -1 && getenv ("COMBINE_CNT"))
+    cxcnt = atoi (getenv ("COMBINE_CNT"));
+  ++cxcurcnt;
+  if (getenv ("COMBINE_SECOND") && cxcurcnt == cxcnt)
+    return;
+
   FOR_EACH_BB_FN (bb, cfun)
     {
       rtx_insn *insn;

hack to undo both or any one of the two changes r265398 did on the function of
my choice (initialy for binary search I was using cxcurcnt >= cxcnt instead of
cxcurcnt == cxcnt in the two spots), and found that with
COMBINE_CNT=74 COMBINE_FIRST=1 COMBINE_SECOND=1
sort.i works as in stage1, so  it is
_ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv that actually matters.
COMBINE_CNT=74 COMBINE_SECOND=1 generates the same (good assembly) as
COMBINE_CNT=74 COMBINE_FIRST=1 COMBINE_SECOND=1, while
COMBINE_CNT=74 COMBINE_FIRST=1 doesn't work the same as COMBINE_CNT=200.
The "bad" to "good" assembly difference is:
        .type   _ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv, %function
 _ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv:
        .fnstart
        @ args = 0, pretend = 0, frame = 8
        @ frame_needed = 0, uses_anonymous_args = 0
        movw    r0, #:lower16:global_options
-       mov     ip, r1
-       movt    r0, #:upper16:global_options
        push    {r4, r5, r6, lr}
        .save {r4, r5, r6, lr}
-       ldr     r0, [r0, #88]
+       movt    r0, #:upper16:global_options
+       mov     r5, r3
        .pad #8
        sub     sp, sp, #8
-       str     r3, [sp]
-       ldr     r1, [r0, #540]
-       cmp     r1, r2
+       ldr     r3, [r0, #88]
+       str     r5, [sp]
+       ldr     r3, [r3, #540]
+       cmp     r3, r2
        bcc     .L2103
-       movw    r5, #:lower16:.LANCHOR1
-       mov     r4, r3
-       movt    r5, #:upper16:.LANCHOR1
-       ldr     r3, [r5, #176]
+       movw    r4, #:lower16:.LANCHOR1
+       mov     ip, r1
+       movt    r4, #:upper16:.LANCHOR1
+       ldr     r3, [r4, #176]
        cmp     r3, #0
-       strne   ip, [r3]
-       ldr     r3, [r4, #12]
+       strne   r1, [r3]
+       ldr     r3, [r5, #12]
        cmp     r3, #0
        ldrne   r2, [r3, #4]
-       ldrne   r3, [r4, #8]
+       ldrne   r3, [r5, #8]
        subne   r3, r3, r2
-       strne   r3, [r4, #8]
-       cmp     ip, #0
+       strne   r3, [r5, #8]
+       cmp     r1, #0
        beq     .L2104
-       ldr     r6, [r5, #12]
+       ldr     r6, [r4, #12]
        b       .L2101
 .L2127:
-       ldr     ip, [r2, #4]
+       ldr     ip, [r3, #4]
 .L2099:
-       ldr     r3, [r5, #8]
+       ldr     r3, [r4, #8]
        cmp     r3, ip
        beq     .L2125
        ldrb    r3, [ip, #3]    @ zero_extendqisi2
        tst     r3, #2
        beq     .L2126
 .L2101:
        ldr     r2, [ip, #4]
        add     r1, sp, #4
        mov     r0, r6
        str     ip, [sp, #4]
        bl     
_ZN10hash_tableI17vn_ssa_aux_hasher11xcallocatorE14find_with_hashERKP9tree_nodej
-       ldr     r2, [r0]
-       cmp     r2, #0
+       ldr     r3, [r0]
+       cmp     r3, #0
        beq     .L2098
-       ldrb    r3, [r2, #16]   @ zero_extendqisi2
-       tst     r3, #1
+       ldrb    r2, [r3, #16]   @ zero_extendqisi2
+       tst     r2, #1
        bne     .L2127
 .L2098:
        ldr     ip, [sp, #4]
        b       .L2099
 .L2126:
-       ldr     r1, [sp]
+       ldr     r3, [sp]
 .L2097:
-       ldrd    r2, [r1, #8]
-       str     ip, [r4, #12]
-       ldr     r0, [r5, #28]
-       cmp     r3, #0
-       ldrne   r3, [r3, #4]
+       str     ip, [r5, #12]
+       ldr     r1, [r3, #12]
+       ldr     r2, [r3, #8]
+       ldr     r0, [r4, #28]
+       cmp     r1, #0
+       ldrne   r1, [r1, #4]
        ldr     r0, [r0, #8]
-       addne   r2, r2, r3
-       mov     r3, #0
-       strne   r2, [r1, #8]
+       addne   r2, r2, r1
        mov     r1, sp
+       strne   r2, [r3, #8]
+       mov     r3, #0
        bl     
_ZN10hash_tableI19vn_reference_hasher11xcallocatorE19find_slot_with_hashERKP14vn_reference_sj13insert_option
        cmp     r0, #0
        ldrne   r0, [r0]
 .L2093:
        add     sp, sp, #8
        @ sp needed
        pop     {r4, r5, r6, pc}
 .L2103:
        mvn     r0, #0
        add     sp, sp, #8
        @ sp needed
        pop     {r4, r5, r6, pc}
 .L2104:
-       mov     r1, r4
+       mov     r3, r5
        b       .L2097
 .L2125:
        movw    r2, #:lower16:.LC42
        movw    r0, #:lower16:.LC3
        movt    r2, #:upper16:.LC42
        movt    r0, #:upper16:.LC3
        movw    r1, #481
        bl      _Z11fancy_abortPKciS0_
        .fnend
        .size   _ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv,
.-_ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv

Reply via email to