http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55906



             Bug #: 55906

           Summary: suboptimal code generated for post-inc on Thumb1

    Classification: Unclassified

           Product: gcc

           Version: 4.8.0

            Status: UNCONFIRMED

          Severity: normal

          Priority: P3

         Component: tree-optimization

        AssignedTo: unassig...@gcc.gnu.org

        ReportedBy: amker.ch...@gmail.com





For below program:



int

ffs(int word)



{

  int i;



  if (!word)

    return 0;



  i = 0;

  for (;;)

    {

      if (((1 << i++) & word) != 0)

 return i;

    }

}



The dump of 164t.optimized is like:

ffs (int word)

{

  int i;

  int _6;

  int _7;



  <bb 2>:

  if (word_3(D) == 0)

    goto <bb 6>;

  else

    goto <bb 3>;



  <bb 3>:



  <bb 4>:

  # i_1 = PHI <0(3), i_5(5)>

  i_5 = i_1 + 1;

  _6 = word_3(D) >> i_1;

  _7 = _6 & 1;

  if (_7 != 0)

    goto <bb 6>;

  else

    goto <bb 5>;



  <bb 5>:

  goto <bb 4>;



  <bb 6>:

  # i_2 = PHI <0(2), i_5(4)>

  return i_2;



}

GCC increases i before i_1 is used, causing i_5 and i_1 to be partitioned into

different partitions as in expanded rtl:

    2: r115:SI=r0:SI

    3: NOTE_INSN_FUNCTION_BEG

    9: pc={(r115:SI==0)?L33:pc}

      REG_BR_PROB 0xf3c

   10: NOTE_INSN_BASIC_BLOCK 4

    4: r110:SI=0

   18: L18:

   11: NOTE_INSN_BASIC_BLOCK 5

   12: r111:SI=r110:SI+0x1        <-----i_5/i_1 in different pseudos

   13: r116:SI=r115:SI>>r110:SI

   14: r118:SI=0x1

   15: r117:SI=r116:SI&r118:SI

      REG_EQUAL r116:SI&0x1

   16: pc={(r117:SI!=0)?L21:pc}

      REG_BR_PROB 0x384

   17: NOTE_INSN_BASIC_BLOCK 6

    5: r110:SI=r111:SI

   19: pc=L18

   20: barrier

   33: L33:

   32: NOTE_INSN_BASIC_BLOCK 7

    6: r111:SI=0

   21: L21:

   22: NOTE_INSN_BASIC_BLOCK 8

   23: r114:SI=r111:SI

   27: r0:SI=r114:SI

   30: use r0:SI



Finally, suboptimal codes are generated :

ffs:

    mov    r3, #0

    push    {r4, lr}

    cmp    r0, r3

    beq    .L2

    mov    r2, r3

    mov    r1, #1

.L3:

    mov    r4, r0

    asr    r4, r4, r2

    add    r3, r2, #1

    tst    r4, r1

    bne    .L2

    mov    r2, r3

    b    .L3

.L2:

    mov    r0, r3

    @ sp needed

    pop    {r4, pc}



While GCC 4.6 generates better codes:

ffs:

    push    {lr}

    sub    r3, r0, #0

    beq    .L2

    mov    r3, #0

    mov    r2, #1

.L3:

    mov    r1, r0

    asr    r1, r1, r3

    add    r3, r3, #1

    tst    r1, r2

    beq    .L3

.L2:

    mov    r0, r3

    @ sp needed for prologue

    pop    {pc}





The command line is:

arm-none-eabi-gcc -mthumb -mcpu=cortex-m0 -Os -S ffs.c -o ffs.S



Same problem exists when optimizing with "-O2"

Reply via email to