Following test generates unoptimized code for test_c(). Generated code should look like code, generated for test_asm():
--cut here-- typedef unsigned SI __attribute__ ((mode (SI))); typedef unsigned DI __attribute__ ((mode (DI))); #define add_ssaaaa_c(sh, sl, ah, al, bh, bl) \ { \ DI __a = (al) | ((DI) ah << 32); \ DI __b = (bl) | ((DI) bh << 32); \ \ DI __c = __a + __b; \ \ (sl) = (SI) (__c & 0xffffffff); \ (sh) = (SI) (__c >> 32); \ } #define add_ssaaaa_asm(sh, sl, ah, al, bh, bl) \ __asm__ ("addl %5,%1\n\tadcl %3,%0" \ : "=r" ((SI) (sh)), \ "=&r" ((SI) (sl)) \ : "%0" ((SI) (ah)), \ "g" ((SI) (bh)), \ "%1" ((SI) (al)), \ "g" ((SI) (bl))) void test_c (SI a, SI b, SI c, SI d) { volatile SI x, y; add_ssaaaa_c (x, y, a, b, c, d); } void test_asm (SI a, SI b, SI c, SI d) { volatile SI x, y; add_ssaaaa_asm (x, y, a, b, c, d); } --cut here-- gcc -O2 -fomit-frame-pointer: test_c: subl $28, %esp #, xorl %edx, %edx # movl 40(%esp), %eax # c, tmp66 movl 44(%esp), %ecx # d, d movl %esi, 20(%esp) #, movl %ebx, 16(%esp) #, movl %eax, %edx # tmp66, movl $0, %eax #, tmp66 movl %eax, %esi #, tmp74 movl 32(%esp), %eax # a, tmp70 movl %edx, %ebx # tmp75, __c orl %ecx, %esi # d, tmp74 xorl %edx, %edx # movl %esi, %ecx # tmp74, __c movl 36(%esp), %esi # b, b movl %edi, 24(%esp) #, movl 24(%esp), %edi #, movl %eax, %edx # tmp70, movl $0, %eax #, tmp70 orl %esi, %eax # b, tmp72 movl 20(%esp), %esi #, addl %eax, %ecx # tmp72, __c adcl %edx, %ebx #, __c movl %ecx, 8(%esp) # __c, y movl %ebx, %ecx # __c, __c xorl %ebx, %ebx # __c movl 16(%esp), %ebx #, movl %ecx, 12(%esp) # __c, x addl $28, %esp #, ret test_asm: subl $16, %esp #, movl 20(%esp), %eax # a, a movl 24(%esp), %edx # b, b #APP addl 32(%esp),%edx # d, tmp63 adcl 28(%esp),%eax # c, tmp62 #NO_APP movl %eax, 12(%esp) # tmp62, x movl %edx, 8(%esp) # tmp63, y addl $16, %esp #, ret This issue needs to be fixed in order to implement effective i386 (DImode) and x86_64 (TImode) wide operations in longlong.h. As discussed in thread starting at [1], it was found that the problem is, that wide operations are split after reload [2] due to FLAGS_REG link between "add" and "adc" insns. FLAGS_REG can be accidetally clobbered by reload. [1] http://gcc.gnu.org/ml/gcc-patches/2007-05/msg01084.html [2] http://gcc.gnu.org/ml/gcc-patches/2007-05/msg01187.html -- Summary: Wide operations (i.e. adddi3) are split too late Product: gcc Version: 4.3.0 Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: target AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: ubizjak at gmail dot com GCC build triplet: i686-pc-linux-gnu GCC host triplet: i686-pc-linux-gnu GCC target triplet: i686-pc-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31985