------- Additional Comments From uros at kss-loka dot si 2005-08-17 09:54 ------- The code produced by crosscompiling from i686 for x86_86, I got functionally equal asm for 32bit and 64bit mode:
gcc -O2 -m64: .LFB128: subq $24, %rsp .LCFI0: leaq 20(%rsp), %rax leaq 24(%rsp), %rdx cmpq %rax, %rdx jbe .L7 movq .LC5(%rip), %mm0 pxor %mm1, %mm1 packuswb %mm1, %mm0 movq %mm0, (%rsp) movq (%rsp), %rax incl %eax jne .L4 emms ... gcc -m32 -mmmx: pushl %ebp movl %esp, %ebp subl $40, %esp leal -4(%ebp), %eax andl $-16, %esp subl $16, %esp cmpl %eax, %ebp jbe .L7 movq .LC6, %mm0 pxor %mm1, %mm1 packuswb %mm1, %mm0 movd %mm0, -20(%ebp) movl -20(%ebp), %eax incl %eax jne .L4 emms ... Tested with gcc version 4.1.0 20050716 (experimental) -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=22432