Consider the following code:

$ cat t.cc
#include <stdint.h>
static inline uint32_t delinearize(uint32_t d)
{
        return d*0x8088405+1;
}
uint64_t delinearize64(uint64_t d)
{
        return (uint64_t(delinearize(d))<<32) | delinearize(d>>32);
}

$ gcc -v
Using built-in specs.
Target: i686-pc-linux-gnu
Configured with: /var/tmp/portage/gcc-4.1.0/work/gcc-4.1.0/configure
--prefix=/usr --bindir=/usr/i686-pc-linux-gnu/gcc-bin/4.1.0
--includedir=/usr/lib/gcc/i686-pc-linux-gnu/4.1.0/include
--datadir=/usr/share/gcc-data/i686-pc-linux-gnu/4.1.0
--mandir=/usr/share/gcc-data/i686-pc-linux-gnu/4.1.0/man
--infodir=/usr/share/gcc-data/i686-pc-linux-gnu/4.1.0/info
--with-gxx-include-dir=/usr/lib/gcc/i686-pc-linux-gnu/4.1.0/include/g++-v4
--host=i686-pc-linux-gnu --build=i686-pc-linux-gnu --disable-altivec
--enable-nls --without-included-gettext --with-system-zlib --disable-checking
--disable-werror --disable-libunwind-exceptions --disable-multilib
--disable-libmudflap --disable-libssp --disable-libgcj --enable-languages=c,c++
--enable-shared --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu
Thread model: posix
gcc version 4.1.0 (Gentoo 4.1.0)

$ g++ -fomit-frame-pointer -O2 -o t.S -S t.cc
$ cat t.S
        .file   "t.cc"
        .text
        .align 2
        .p2align 4,,15
.globl _Z13delinearize64y
        .type   _Z13delinearize64y, @function
_Z13delinearize64y:
.LFB3:
        pushl   %ebx
.LCFI0:
        xorl    %edx, %edx
        movl    8(%esp), %ecx
        movl    12(%esp), %ebx
        imull   $134775813, %ecx, %eax
        movl    %ebx, %ecx
        xorl    %ebx, %ebx
        imull   $134775813, %ecx, %ecx
        popl    %ebx
        incl    %eax
        movl    %eax, %edx
        movl    $0, %eax
        incl    %ecx
        orl     %ecx, %eax
        ret
.LFE3:
        .size   _Z13delinearize64y, .-_Z13delinearize64y
.globl __gxx_personality_v0
        .ident  "GCC: (GNU) 4.1.0 (Gentoo 4.1.0)"
        .section        .note.GNU-stack,"",@progbits

Well, that's way to complicated for such a "simple" operation. The whole
function can be written as:

_Z13delinearize64y:
        imull   $134775813, 8(%esp), %edx
        imull   $134775813, 12(%esp), %eax
        incl    %edx
        incl    %eax
        ret

I also tested some gnu-4.2.0-alpha20060415 snapshot which generates different
(but not really better) code.


-- 
           Summary: Missing optimiations with uint64_t
           Product: gcc
           Version: 4.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: sb at biallas dot net


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27202

Reply via email to