https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68636

            Bug ID: 68636
           Summary: unnecessary unaligned load on mips o32
           Product: gcc
           Version: 5.2.1
            Status: UNCONFIRMED
          Severity: trivial
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jan.sm...@alcatel-lucent.com
  Target Milestone: ---

typedef unsigned long uintptr_t;;

volatile unsigned int x;
void testTestTest(void)
{
        x = *((volatile unsigned int *) (((volatile void
*)((uintptr_t)(0x00800070) | ((0xA0000000) + (((((uintptr_t)(0x00800070)) >=
0x00800000) && ( { register int zeroIsCavium; asm volatile ( "   .set    push  
         \n" "   .set    noreorder       \n" "   mfc0    %0,$15          \n" " 
 srl     %0,%0,16        \n" "   andi    %0,%0,0xff      \n" "   xori   
%0,%0,0x0d      \n" "   .set    pop             \n" : "=r" (zeroIsCavium));
(zeroIsCavium == 0); } )) ? 0x20000000 : 0))))));
}


GCC 4.8 (-march=mips2 -O2 (mabi=o32)) generates

00000000 <testTestTest>:
   0:   40027800        mfc0    v0,$15
   4:   00021402        srl     v0,v0,0x10
   8:   304200ff        andi    v0,v0,0xff
   c:   3842000d        xori    v0,v0,0xd
  10:   10400006        beqz    v0,2c <testTestTest+0x2c>
  14:   3c02a080        lui     v0,0xa080
  18:   24420070        addiu   v0,v0,112
  1c:   8c430000        lw      v1,0(v0)
  20:   3c020000        lui     v0,0x0
  24:   03e00008        jr      ra
  28:   ac430000        sw      v1,0(v0)
  2c:   3c02c080        lui     v0,0xc080
  30:   24420070        addiu   v0,v0,112
  34:   8c430000        lw      v1,0(v0)
  38:   3c020000        lui     v0,0x0
  3c:   03e00008        jr      ra
  40:   ac430000        sw      v1,0(v0)

GCC 5.2.1  more or less replaces the LW with LWL+LWR

00000000 <testTestTest>:
   0:   40027800        mfc0    v0,$15
   4:   00021402        srl     v0,v0,0x10
   8:   304200ff        andi    v0,v0,0xff
   c:   3842000d        xori    v0,v0,0xd
  10:   10400008        beqz    v0,34 <testTestTest+0x34>
  14:   3c02a080        lui     v0,0xa080
  18:   24420070        addiu   v0,v0,112
  1c:   88430000        lwl     v1,0(v0)
  20:   98430003        lwr     v1,3(v0)
  24:   00601021        move    v0,v1
  28:   3c030000        lui     v1,0x0
  2c:   03e00008        jr      ra
  30:   ac620000        sw      v0,0(v1)
  34:   3c02c080        lui     v0,0xc080
  38:   24420070        addiu   v0,v0,112
  3c:   88430000        lwl     v1,0(v0)
  40:   98430003        lwr     v1,3(v0)
  44:   00601021        move    v0,v1
  48:   3c030000        lui     v1,0x0
  4c:   03e00008        jr      ra
  50:   ac620000        sw      v0,0(v1)

When taking just the true branch of the inline assembly, thus further reducing
to something like

x = *((volatile unsigned int *) (((volatile void *)((uintptr_t)(0x00800070) |
((0xA0000000) + 0x20000000) ))));

then results in exactly the same code in both 4.8 and 5.2.1

00000000 <testTestTest>:
   0:   3c02c080        lui     v0,0xc080
   4:   8c430070        lw      v1,112(v0)
   8:   3c020000        lui     v0,0x0
   c:   ac430000        sw      v1,0(v0)
  10:   03e00008        jr      ra
  14:   00000000        nop


Is there a specific reason it starts using unaligned accesses when the inline
assembly is included? The variable factor is either 0x20000000 or 0x0, which
both result in aligned addresses.

Thanks

Reply via email to