Following test generates unoptimized code for test_c(). Generated code should
look like code, generated for test_asm():

--cut here--
typedef unsigned SI __attribute__ ((mode (SI)));
typedef unsigned DI __attribute__ ((mode (DI)));


#define add_ssaaaa_c(sh, sl, ah, al, bh, bl)    \
 {                                              \
   DI __a = (al) | ((DI) ah << 32);             \
   DI __b = (bl) | ((DI) bh << 32);             \
                                                \
   DI __c = __a + __b;                          \
                                                \
   (sl) = (SI) (__c & 0xffffffff);              \
   (sh) = (SI) (__c >> 32);                     \
 }


#define add_ssaaaa_asm(sh, sl, ah, al, bh, bl)  \
 __asm__ ("addl %5,%1\n\tadcl %3,%0"            \
           : "=r" ((SI) (sh)),                  \
             "=&r" ((SI) (sl))                  \
           : "%0" ((SI) (ah)),                  \
             "g" ((SI) (bh)),                   \
             "%1" ((SI) (al)),                  \
             "g" ((SI) (bl)))


void test_c (SI a, SI b, SI c, SI d)
{
 volatile SI x, y;


 add_ssaaaa_c (x, y, a, b, c, d);
}


void test_asm (SI a, SI b, SI c, SI d)
{
 volatile SI x, y;


 add_ssaaaa_asm (x, y, a, b, c, d);
}
--cut here--

gcc -O2 -fomit-frame-pointer:

test_c:
       subl    $28, %esp       #,
       xorl    %edx, %edx      #
       movl    40(%esp), %eax  # c, tmp66
       movl    44(%esp), %ecx  # d, d
       movl    %esi, 20(%esp)  #,
       movl    %ebx, 16(%esp)  #,
       movl    %eax, %edx      # tmp66,
       movl    $0, %eax        #, tmp66
       movl    %eax, %esi      #, tmp74
       movl    32(%esp), %eax  # a, tmp70
       movl    %edx, %ebx      # tmp75, __c
       orl     %ecx, %esi      # d, tmp74
       xorl    %edx, %edx      #
       movl    %esi, %ecx      # tmp74, __c
       movl    36(%esp), %esi  # b, b
       movl    %edi, 24(%esp)  #,
       movl    24(%esp), %edi  #,
       movl    %eax, %edx      # tmp70,
       movl    $0, %eax        #, tmp70
       orl     %esi, %eax      # b, tmp72
       movl    20(%esp), %esi  #,
       addl    %eax, %ecx      # tmp72, __c
       adcl    %edx, %ebx      #, __c
       movl    %ecx, 8(%esp)   # __c, y
       movl    %ebx, %ecx      # __c, __c
       xorl    %ebx, %ebx      # __c
       movl    16(%esp), %ebx  #,
       movl    %ecx, 12(%esp)  # __c, x
       addl    $28, %esp       #,
       ret


test_asm:
       subl    $16, %esp       #,
       movl    20(%esp), %eax  # a, a
       movl    24(%esp), %edx  # b, b
#APP
       addl 32(%esp),%edx      # d, tmp63
       adcl 28(%esp),%eax      # c, tmp62
#NO_APP
       movl    %eax, 12(%esp)  # tmp62, x
       movl    %edx, 8(%esp)   # tmp63, y
       addl    $16, %esp       #,
       ret

This issue needs to be fixed in order to implement effective i386 (DImode) and
x86_64 (TImode) wide operations in longlong.h. As discussed in thread starting
at [1], it was found that the problem is, that wide operations are split after
reload [2] due to FLAGS_REG link between "add" and "adc" insns. FLAGS_REG can
be accidetally clobbered by reload.

[1] http://gcc.gnu.org/ml/gcc-patches/2007-05/msg01084.html
[2] http://gcc.gnu.org/ml/gcc-patches/2007-05/msg01187.html


-- 
           Summary: Wide operations (i.e. adddi3) are split too late
           Product: gcc
           Version: 4.3.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: ubizjak at gmail dot com
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31985

Reply via email to