The function below, compiled with -O9 -fomit-frame-pointer, fails to optimize
the final 16-bit swap
into an x86 "roll" instruction *if* the return type is uint64_t. If the return
type is uint32_t, a roll
instruction is generated, and the resulting code is 6 instructions (19 bytes)
shorter. All of the math in
the function is done in 32-bit unsigned values, so ideally the only effect of
setting the return type to
uint64_t should be that the compiler arranges for %edx to be zero at the end.
typedef unsigned long long uint64_t;
typedef unsigned long uint32_t;
#define REV32STEP(VAR, SHIFT, MASK) \
VAR = (((VAR >> SHIFT) & MASK) | ((VAR << SHIFT) & (0xFFFFFFFFUL & ~MASK)))
uint64_t bitreverse2 (uint32_t n) {
uint32_t lower = n;
REV32STEP(lower, 1, 0x55555555UL); /* odd/even bits */
REV32STEP(lower, 2, 0x33333333UL); /* bitpairs */
REV32STEP(lower, 4, 0x0F0F0F0FUL); /* nibbles */
REV32STEP(lower, 8, 0x00FF00FFUL); /* bytes */
REV32STEP(lower, 16, 0x0000FFFFUL); /* halfwords */
return lower;
}
64-bit version:
bitreverse2:
movl 4(%esp), %edx
movl %edx, %eax
addl %edx, %edx
shrl %eax
andl $-1431655766, %edx
andl $1431655765, %eax
orl %edx, %eax
movl %eax, %edx
shrl $2, %edx
sall $2, %eax
andl $858993459, %edx
andl $-858993460, %eax
orl %eax, %edx
movl %edx, %eax
shrl $4, %eax
sall $4, %edx
andl $252645135, %eax
andl $-252645136, %edx
orl %edx, %eax
xorl %edx, %edx # <- edx is cleared here
movl %eax, %ecx
shrl $8, %ecx
sall $8, %eax
andl $16711935, %ecx
andl $-16711936, %eax
orl %eax, %ecx
# final swap starts here
movl %ecx, %eax
shrl $16, %eax
sall $16, %ecx
andl $65535, %eax # <- redundant after shift
andl $-65536, %ecx # <- redundant after shift
orl %ecx, %eax
ret
32-bit version:
bitreverse2:
movl 4(%esp), %edx
movl %edx, %eax
addl %edx, %edx
shrl %eax
andl $-1431655766, %edx
andl $1431655765, %eax
orl %edx, %eax
movl %eax, %edx
shrl $2, %edx
sall $2, %eax
andl $858993459, %edx
andl $-858993460, %eax
orl %eax, %edx
movl %edx, %ecx
shrl $4, %ecx
sall $4, %edx
andl $252645135, %ecx
andl $-252645136, %edx
orl %edx, %ecx
movl %ecx, %eax
shrl $8, %eax
sall $8, %ecx
andl $16711935, %eax
andl $-16711936, %ecx
orl %ecx, %eax
roll $16, %eax # <- final swap
ret
This is with compiler sources from CVS as of about 6AM this morning US/Eastern.
--
Summary: returning 64-bit value turns off some 32-bit
optimizations
Product: gcc
Version: unknown
Status: UNCONFIRMED
Severity: minor
Priority: P2
Component: rtl-optimization
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: raeburn at raeburn dot org
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: i686-pc-linux-gnu
GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23811