http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54386
Bug #: 54386
Summary: Unaligned mem load wrongly generated for inlined
inline/static function
Classification: Unclassified
Product: gcc
Version: 4.8.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
AssignedTo: [email protected]
ReportedBy: [email protected]
Target: sh*-*-*
I've tested this only on SH, but it might be a generic case.
The following code snippet...
static __inline__ int
__testbit (unsigned long nr, unsigned long* a)
{
return (*a & (1 << nr)) != 0;
}
typedef struct page2
{
void* x[2];
unsigned long flags;
void* a;
void* b;
} page2_t;
void activate_page3(struct page2 * page)
{
if ( ! __testbit (7, &(page->flags)) )
{
page->a = 0;
page->b = 0;
}
}
...compiled with -O2 expands to the following RTL:
void activate_page3(page2*) (struct page2 * page)
{
long unsigned int _4;
long unsigned int _7;
;; basic block 2, loop depth 0
;; pred: ENTRY
_4 = MEM[(long unsigned int *)page_2(D) + 8B];
_7 = _4 & 128;
if (_7 == 0)
goto <bb 3>;
else
goto <bb 4>;
;; succ: 3
;; 4
;; basic block 3, loop depth 0
;; pred: 2
page_2(D)->a = 0B;
page_2(D)->b = 0B;
;; succ: 4
;; basic block 4, loop depth 0
;; pred: 2
;; 3
return;
;; succ: EXIT
}
;; Generating RTL for gimple basic block 2
;; if (_7 == 0)
(insn 7 5 8 (set (reg:QI 165)
(mem:QI (plus:SI (reg/v/f:SI 162 [ page ])
(const_int 8 [0x8])) [0+0 S1 A8])) sh_tmp.cpp:699 -1
(nil))
(insn 8 7 9 (set (reg:SI 163)
(zero_extend:SI (reg:QI 165))) sh_tmp.cpp:699 -1
(nil))
(insn 9 8 11 (set (reg:SI 166)
(ashift:SI (reg:SI 163)
(const_int 24 [0x18]))) sh_tmp.cpp:699 -1
(nil))
(insn 11 9 12 (set (reg:QI 169)
(mem:QI (plus:SI (reg/v/f:SI 162 [ page ])
(const_int 9 [0x9])) [0+1 S1 A8])) sh_tmp.cpp:699 -1
(nil))
[...]
For some reason the SI mem access is converted to four QI mem accesses, which
looks like an unaligned load. Since only one QI part is needed of the QI loads
disappear after the combine pass has done its thing.
On the other hand, removing 'static' and 'inline' for the function __testbit,
like:
int
__testbit (unsigned long nr, unsigned long* a)
would still do the inlining (as expected) and expand to (aligned) SI mem load:
void activate_page3(page2*) (struct page2 * page)
{
long unsigned int _6;
long unsigned int _7;
;; basic block 2, loop depth 0
;; pred: ENTRY
_6 = MEM[(long unsigned int *)page_2(D) + 8B];
_7 = _6 & 128;
if (_7 == 0)
goto <bb 3>;
else
goto <bb 4>;
;; succ: 3
;; 4
;; basic block 3, loop depth 0
;; pred: 2
page_2(D)->a = 0B;
page_2(D)->b = 0B;
;; succ: 4
;; basic block 4, loop depth 0
;; pred: 2
;; 3
return;
;; succ: EXIT
}
;; Generating RTL for gimple basic block 2
;; if (_7 == 0)
(insn 6 5 7 (set (reg:SI 164)
(mem:SI (plus:SI (reg/v/f:SI 162 [ page ])
(const_int 8 [0x8])) [2 MEM[(long unsigned int *)page_2(D) +
8B]+0 S4 A32])) swap.i:20 -1
(nil))
(insn 7 6 8 (set (reg:SI 163 [ D.1692 ])
(and:SI (reg:SI 164)
(const_int 128 [0x80]))) swap.i:20 -1
(nil))
The unaligned loads are generated if the __testbit function is
- 'static'
- 'inline'
- 'static inline'
sh-elf-gcc -v
Using built-in specs.
COLLECT_GCC=sh-elf-gcc
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/sh-elf/4.8.0/lto-wrapper
Target: sh-elf
Configured with: ../gcc-trunk-van/configure --target=sh-elf --prefix=/usr/local
--enable-languages=c,c++ --enable-multilib --disable-libssp --disable-nls
--disable-werror --enable-lto --with-newlib --with-gnu-as --with-gnu-ld
--with-system-zlib
Thread model: single
gcc version 4.8.0 20120827 (experimental) (GCC)