Hi all,
gcc-4.2.2 seems to generating wrong/misaligned code for movapd.

I have used the same test case mentione here (for almost the similar bug)
http://gcc.gnu.org/bugzilla/attachment.cgi?id=6012


The relavent information about the version and the files are as follows:

The version of gcc:
gcc -v
Using built-in specs.
Target: i386-redhat-linux
Configured with: ../../src/gcc-4.2.2/configure
--prefix=/depot/gcc-4.2.2-static --disable-shared
--enable-threads=posix --disable-checking --with-system-zlib
--enable-__cxa_atexit --disable-libunwind-exceptions
--enable-languages=c,c++,objc,fortran --with-cpu=generic
--host=i386-redhat-linux
Thread model: posi

$ gcc -g -O2 -funsigned-bitfields -fsigned-char -ffloat-store -Wformat
 -msse2 -mfpmath=sse  -c sse.c
$ objdump -Sd sse.o > sse_asm.txt

$ grep movapd sse_asm.txt
  a3:   66 0f 28 c2             movapd %xmm2,%xmm0
  cb:   66 0f 28 c1             movapd %xmm1,%xmm0
  f6:   66 0f 28 c4             movapd %xmm4,%xmm0
 165:   66 0f 29 9d 38 fe ff    movapd %xmm3,0xfffffe38(%ebp) #<============
 185:   66 0f 28 9d 38 fe ff    movapd 0xfffffe38(%ebp),%xmm3


Is this a known issue? If so, is there are there any suggested
workarounds (other than upgrading to later versions :-) ?

Regards,
Gowri Kumar
double sin(double x);
double cos(double x);

typedef struct {
    double x, y, z;
} DVECTOR;

typedef struct {
   DVECTOR m[4];
} DMATRIX;


void
create_geo_to_topo(DMATRIX *xfp, double lat, double lon)
{
    double cos_lat;

    {
  double sin_lon, cos_lon, sin_lat;

  sin_lon = sin(lon);
  cos_lon = cos(lon);
  sin_lat = sin(lat);
  cos_lat = cos(lat);

  xfp->m[0].x = -sin_lon;
        xfp->m[1].x =  cos_lon;
        xfp->m[2].x = 0.0;
        
        xfp->m[0].y = -cos_lon  * sin_lat;
        xfp->m[1].y = -sin_lon  * sin_lat;
        xfp->m[2].y = cos_lat;

        xfp->m[0].z = cos_lon  * cos_lat;
        xfp->m[1].z = sin_lon  * cos_lat;
        xfp->m[2].z = sin_lat;

        {
            DVECTOR geo;
            DVECTOR lla;

            lla.x = lat;
            lla.y = lon;
            lla.z = 0.0;
            geo_lla_xyz( &lla, &geo);


            xfp->m[3].x = -geo.x * xfp->m[0].x -
                           geo.y * xfp->m[1].x -
                           geo.z * xfp->m[2].x;
            xfp->m[3].y = -geo.x * xfp->m[0].y -
                           geo.y * xfp->m[1].y -
                           geo.z * xfp->m[2].y;
            xfp->m[3].z = -geo.x * xfp->m[0].z -
                           geo.y * xfp->m[1].z -
                           geo.z * xfp->m[2].z; 
        }
    }
}

Attachment: sse.i
Description: Binary data

Attachment: sse.s
Description: Binary data

sse.o:     file format elf32-i386

Disassembly of section .text:

00000000 <create_geo_to_topo>:


void
create_geo_to_topo(DMATRIX *xfp, double lat, double lon)
{
   0:   55                      push   %ebp
   1:   89 e5                   mov    %esp,%ebp
   3:   53                      push   %ebx
   4:   81 ec d4 01 00 00       sub    $0x1d4,%esp
   a:   8b 45 0c                mov    0xc(%ebp),%eax
   d:   8b 5d 08                mov    0x8(%ebp),%ebx
  10:   89 85 50 fe ff ff       mov    %eax,0xfffffe50(%ebp)
  16:   8b 45 10                mov    0x10(%ebp),%eax
  19:   89 85 54 fe ff ff       mov    %eax,0xfffffe54(%ebp)
  1f:   8b 45 14                mov    0x14(%ebp),%eax
  22:   89 85 48 fe ff ff       mov    %eax,0xfffffe48(%ebp)
  28:   8b 45 18                mov    0x18(%ebp),%eax
  2b:   89 85 4c fe ff ff       mov    %eax,0xfffffe4c(%ebp)
    double cos_lat;

    {
  double sin_lon, cos_lon, sin_lat;

  sin_lon = sin(lon);
  31:   f2 0f 10 85 48 fe ff    movsd  0xfffffe48(%ebp),%xmm0
  38:   ff 
  39:   f2 0f 11 04 24          movsd  %xmm0,(%esp)
  3e:   e8 fc ff ff ff          call   3f <create_geo_to_topo+0x3f>
  cos_lon = cos(lon);
  43:   f2 0f 10 85 48 fe ff    movsd  0xfffffe48(%ebp),%xmm0
  4a:   ff 
  4b:   f2 0f 11 04 24          movsd  %xmm0,(%esp)
  50:   dd 5d e8                fstpl  0xffffffe8(%ebp)
  53:   e8 fc ff ff ff          call   54 <create_geo_to_topo+0x54>
  sin_lat = sin(lat);
  58:   f2 0f 10 85 50 fe ff    movsd  0xfffffe50(%ebp),%xmm0
  5f:   ff 
  60:   f2 0f 11 04 24          movsd  %xmm0,(%esp)
  65:   dd 5d e0                fstpl  0xffffffe0(%ebp)
  68:   e8 fc ff ff ff          call   69 <create_geo_to_topo+0x69>
  cos_lat = cos(lat);
  6d:   f2 0f 10 85 50 fe ff    movsd  0xfffffe50(%ebp),%xmm0
  74:   ff 
  75:   f2 0f 11 04 24          movsd  %xmm0,(%esp)
  7a:   dd 5d d8                fstpl  0xffffffd8(%ebp)
  7d:   e8 fc ff ff ff          call   7e <create_geo_to_topo+0x7e>

  xfp->m[0].x = -sin_lon;
        xfp->m[1].x =  cos_lon;
        xfp->m[2].x = 0.0;
        
        xfp->m[0].y = -cos_lon  * sin_lat;
        xfp->m[1].y = -sin_lon  * sin_lat;
        xfp->m[2].y = cos_lat;

        xfp->m[0].z = cos_lon  * cos_lat;
        xfp->m[1].z = sin_lon  * cos_lat;
        xfp->m[2].z = sin_lat;

        {
            DVECTOR geo;
            DVECTOR lla;

            lla.x = lat;
            lla.y = lon;
            lla.z = 0.0;
            geo_lla_xyz( &lla, &geo);
  82:   8d 85 70 fe ff ff       lea    0xfffffe70(%ebp),%eax
  88:   f2 0f 10 55 e8          movsd  0xffffffe8(%ebp),%xmm2
  8d:   f2 0f 10 4d e0          movsd  0xffffffe0(%ebp),%xmm1
  92:   f2 0f 10 65 d8          movsd  0xffffffd8(%ebp),%xmm4
  97:   f2 0f 10 1d 00 00 00    movsd  0x0,%xmm3
  9e:   00 
  9f:   66 0f 57 ed             xorpd  %xmm5,%xmm5
  a3:   66 0f 28 c2             movapd %xmm2,%xmm0
  a7:   66 0f 57 c3             xorpd  %xmm3,%xmm0
  ab:   f2 0f 11 45 d0          movsd  %xmm0,0xffffffd0(%ebp)
  b0:   f2 0f 10 45 d0          movsd  0xffffffd0(%ebp),%xmm0
  b5:   dd 5d f0                fstpl  0xfffffff0(%ebp)
  b8:   f2 0f 11 03             movsd  %xmm0,(%ebx)
  bc:   f2 0f 11 4b 18          movsd  %xmm1,0x18(%ebx)
  c1:   f2 0f 11 6b 30          movsd  %xmm5,0x30(%ebx)
  c6:   f2 0f 59 55 f0          mulsd  0xfffffff0(%ebp),%xmm2
  cb:   66 0f 28 c1             movapd %xmm1,%xmm0
  cf:   f2 0f 59 4d f0          mulsd  0xfffffff0(%ebp),%xmm1
  d4:   66 0f 57 c3             xorpd  %xmm3,%xmm0
  d8:   f2 0f 11 45 c8          movsd  %xmm0,0xffffffc8(%ebp)
  dd:   f2 0f 10 45 c8          movsd  0xffffffc8(%ebp),%xmm0
  e2:   f2 0f 59 45 d8          mulsd  0xffffffd8(%ebp),%xmm0
  e7:   f2 0f 11 45 c0          movsd  %xmm0,0xffffffc0(%ebp)
  ec:   f2 0f 10 45 c0          movsd  0xffffffc0(%ebp),%xmm0
  f1:   f2 0f 11 43 08          movsd  %xmm0,0x8(%ebx)
  f6:   66 0f 28 c4             movapd %xmm4,%xmm0
  fa:   f2 0f 59 45 d0          mulsd  0xffffffd0(%ebp),%xmm0
  ff:   f2 0f 11 45 b8          movsd  %xmm0,0xffffffb8(%ebp)
 104:   f2 0f 10 45 b8          movsd  0xffffffb8(%ebp),%xmm0
 109:   f2 0f 11 43 20          movsd  %xmm0,0x20(%ebx)
 10e:   f2 0f 10 45 f0          movsd  0xfffffff0(%ebp),%xmm0
 113:   f2 0f 11 43 38          movsd  %xmm0,0x38(%ebx)
 118:   f2 0f 11 4d b0          movsd  %xmm1,0xffffffb0(%ebp)
 11d:   f2 0f 10 45 b0          movsd  0xffffffb0(%ebp),%xmm0
 122:   f2 0f 11 43 10          movsd  %xmm0,0x10(%ebx)
 127:   f2 0f 11 55 a8          movsd  %xmm2,0xffffffa8(%ebp)
 12c:   f2 0f 10 45 a8          movsd  0xffffffa8(%ebp),%xmm0
 131:   f2 0f 11 63 40          movsd  %xmm4,0x40(%ebx)
 136:   f2 0f 11 43 28          movsd  %xmm0,0x28(%ebx)
 13b:   f2 0f 10 85 50 fe ff    movsd  0xfffffe50(%ebp),%xmm0
 142:   ff 
 143:   f2 0f 11 85 58 fe ff    movsd  %xmm0,0xfffffe58(%ebp)
 14a:   ff 
 14b:   f2 0f 10 85 48 fe ff    movsd  0xfffffe48(%ebp),%xmm0
 152:   ff 
 153:   f2 0f 11 85 60 fe ff    movsd  %xmm0,0xfffffe60(%ebp)
 15a:   ff 
 15b:   89 44 24 04             mov    %eax,0x4(%esp)
 15f:   8d 85 58 fe ff ff       lea    0xfffffe58(%ebp),%eax
 165:   66 0f 29 9d 38 fe ff    movapd %xmm3,0xfffffe38(%ebp)
 16c:   ff 
 16d:   f2 0f 11 ad 68 fe ff    movsd  %xmm5,0xfffffe68(%ebp)
 174:   ff 
 175:   89 04 24                mov    %eax,(%esp)
 178:   e8 fc ff ff ff          call   179 <create_geo_to_topo+0x179>


            xfp->m[3].x = -geo.x * xfp->m[0].x -
 17d:   f2 0f 10 a5 70 fe ff    movsd  0xfffffe70(%ebp),%xmm4
 184:   ff 
 185:   66 0f 28 9d 38 fe ff    movapd 0xfffffe38(%ebp),%xmm3
 18c:   ff 
 18d:   f2 0f 10 95 78 fe ff    movsd  0xfffffe78(%ebp),%xmm2
 194:   ff 
 195:   f2 0f 10 8d 80 fe ff    movsd  0xfffffe80(%ebp),%xmm1
 19c:   ff 
 19d:   f2 0f 11 65 a0          movsd  %xmm4,0xffffffa0(%ebp)
 1a2:   f2 0f 10 45 a0          movsd  0xffffffa0(%ebp),%xmm0
 1a7:   66 0f 57 c3             xorpd  %xmm3,%xmm0
 1ab:   f2 0f 11 45 98          movsd  %xmm0,0xffffff98(%ebp)
 1b0:   f2 0f 10 03             movsd  (%ebx),%xmm0
 1b4:   f2 0f 11 55 80          movsd  %xmm2,0xffffff80(%ebp)
 1b9:   f2 0f 11 45 90          movsd  %xmm0,0xffffff90(%ebp)
 1be:   f2 0f 10 45 98          movsd  0xffffff98(%ebp),%xmm0
 1c3:   f2 0f 59 45 90          mulsd  0xffffff90(%ebp),%xmm0
 1c8:   f2 0f 11 45 88          movsd  %xmm0,0xffffff88(%ebp)
 1cd:   f2 0f 10 43 18          movsd  0x18(%ebx),%xmm0
 1d2:   f2 0f 11 8d 60 ff ff    movsd  %xmm1,0xffffff60(%ebp)
 1d9:   ff 
 1da:   f2 0f 11 85 78 ff ff    movsd  %xmm0,0xffffff78(%ebp)
 1e1:   ff 
 1e2:   f2 0f 10 45 80          movsd  0xffffff80(%ebp),%xmm0
 1e7:   f2 0f 59 85 78 ff ff    mulsd  0xffffff78(%ebp),%xmm0
 1ee:   ff 
 1ef:   f2 0f 11 85 70 ff ff    movsd  %xmm0,0xffffff70(%ebp)
 1f6:   ff 
 1f7:   f2 0f 10 45 88          movsd  0xffffff88(%ebp),%xmm0
 1fc:   f2 0f 5c 85 70 ff ff    subsd  0xffffff70(%ebp),%xmm0
 203:   ff 
 204:   f2 0f 11 85 68 ff ff    movsd  %xmm0,0xffffff68(%ebp)
 20b:   ff 
 20c:   f2 0f 10 43 30          movsd  0x30(%ebx),%xmm0
 211:   f2 0f 11 85 58 ff ff    movsd  %xmm0,0xffffff58(%ebp)
 218:   ff 
 219:   f2 0f 10 85 60 ff ff    movsd  0xffffff60(%ebp),%xmm0
 220:   ff 
 221:   f2 0f 59 85 58 ff ff    mulsd  0xffffff58(%ebp),%xmm0
 228:   ff 
 229:   f2 0f 11 85 50 ff ff    movsd  %xmm0,0xffffff50(%ebp)
 230:   ff 
 231:   f2 0f 10 85 68 ff ff    movsd  0xffffff68(%ebp),%xmm0
 238:   ff 
 239:   f2 0f 5c 85 50 ff ff    subsd  0xffffff50(%ebp),%xmm0
 240:   ff 
 241:   f2 0f 11 85 48 ff ff    movsd  %xmm0,0xffffff48(%ebp)
 248:   ff 
 249:   f2 0f 10 85 48 ff ff    movsd  0xffffff48(%ebp),%xmm0
 250:   ff 
 251:   f2 0f 11 43 48          movsd  %xmm0,0x48(%ebx)
                           geo.y * xfp->m[1].x -
                           geo.z * xfp->m[2].x;
            xfp->m[3].y = -geo.x * xfp->m[0].y -
 256:   f2 0f 11 a5 90 fe ff    movsd  %xmm4,0xfffffe90(%ebp)
 25d:   ff 
 25e:   f2 0f 10 85 90 fe ff    movsd  0xfffffe90(%ebp),%xmm0
 265:   ff 
 266:   66 0f 57 c3             xorpd  %xmm3,%xmm0
 26a:   f2 0f 11 85 a0 fe ff    movsd  %xmm0,0xfffffea0(%ebp)
 271:   ff 
 272:   f2 0f 10 43 08          movsd  0x8(%ebx),%xmm0
 277:   f2 0f 11 95 b0 fe ff    movsd  %xmm2,0xfffffeb0(%ebp)
 27e:   ff 
 27f:   f2 0f 11 85 40 ff ff    movsd  %xmm0,0xffffff40(%ebp)
 286:   ff 
 287:   f2 0f 10 85 a0 fe ff    movsd  0xfffffea0(%ebp),%xmm0
 28e:   ff 
 28f:   f2 0f 59 85 40 ff ff    mulsd  0xffffff40(%ebp),%xmm0
 296:   ff 
 297:   f2 0f 11 85 38 ff ff    movsd  %xmm0,0xffffff38(%ebp)
 29e:   ff 
 29f:   f2 0f 10 43 20          movsd  0x20(%ebx),%xmm0
 2a4:   f2 0f 11 8d c0 fe ff    movsd  %xmm1,0xfffffec0(%ebp)
 2ab:   ff 
 2ac:   f2 0f 11 85 30 ff ff    movsd  %xmm0,0xffffff30(%ebp)
 2b3:   ff 
 2b4:   f2 0f 10 85 b0 fe ff    movsd  0xfffffeb0(%ebp),%xmm0
 2bb:   ff 
 2bc:   f2 0f 59 85 30 ff ff    mulsd  0xffffff30(%ebp),%xmm0
 2c3:   ff 
 2c4:   f2 0f 11 85 28 ff ff    movsd  %xmm0,0xffffff28(%ebp)
 2cb:   ff 
 2cc:   f2 0f 10 85 38 ff ff    movsd  0xffffff38(%ebp),%xmm0
 2d3:   ff 
 2d4:   f2 0f 5c 85 28 ff ff    subsd  0xffffff28(%ebp),%xmm0
 2db:   ff 
 2dc:   f2 0f 11 85 20 ff ff    movsd  %xmm0,0xffffff20(%ebp)
 2e3:   ff 
 2e4:   f2 0f 10 43 38          movsd  0x38(%ebx),%xmm0
 2e9:   f2 0f 11 85 18 ff ff    movsd  %xmm0,0xffffff18(%ebp)
 2f0:   ff 
 2f1:   f2 0f 10 85 c0 fe ff    movsd  0xfffffec0(%ebp),%xmm0
 2f8:   ff 
 2f9:   f2 0f 59 85 18 ff ff    mulsd  0xffffff18(%ebp),%xmm0
 300:   ff 
 301:   f2 0f 11 85 10 ff ff    movsd  %xmm0,0xffffff10(%ebp)
 308:   ff 
 309:   f2 0f 10 85 20 ff ff    movsd  0xffffff20(%ebp),%xmm0
 310:   ff 
 311:   f2 0f 5c 85 10 ff ff    subsd  0xffffff10(%ebp),%xmm0
 318:   ff 
 319:   f2 0f 11 85 08 ff ff    movsd  %xmm0,0xffffff08(%ebp)
 320:   ff 
 321:   f2 0f 10 85 08 ff ff    movsd  0xffffff08(%ebp),%xmm0
 328:   ff 
 329:   f2 0f 11 43 50          movsd  %xmm0,0x50(%ebx)
                           geo.y * xfp->m[1].y -
                           geo.z * xfp->m[2].y;
            xfp->m[3].z = -geo.x * xfp->m[0].z -
 32e:   f2 0f 11 a5 88 fe ff    movsd  %xmm4,0xfffffe88(%ebp)
 335:   ff 
 336:   f2 0f 10 85 88 fe ff    movsd  0xfffffe88(%ebp),%xmm0
 33d:   ff 
 33e:   66 0f 57 c3             xorpd  %xmm3,%xmm0
 342:   f2 0f 11 85 98 fe ff    movsd  %xmm0,0xfffffe98(%ebp)
 349:   ff 
 34a:   f2 0f 10 43 10          movsd  0x10(%ebx),%xmm0
 34f:   f2 0f 11 85 00 ff ff    movsd  %xmm0,0xffffff00(%ebp)
 356:   ff 
 357:   f2 0f 10 85 98 fe ff    movsd  0xfffffe98(%ebp),%xmm0
 35e:   ff 
 35f:   f2 0f 59 85 00 ff ff    mulsd  0xffffff00(%ebp),%xmm0
 366:   ff 
 367:   f2 0f 11 85 f8 fe ff    movsd  %xmm0,0xfffffef8(%ebp)
 36e:   ff 
 36f:   f2 0f 11 95 a8 fe ff    movsd  %xmm2,0xfffffea8(%ebp)
 376:   ff 
 377:   f2 0f 10 43 28          movsd  0x28(%ebx),%xmm0
 37c:   f2 0f 11 8d b8 fe ff    movsd  %xmm1,0xfffffeb8(%ebp)
 383:   ff 
 384:   f2 0f 11 85 f0 fe ff    movsd  %xmm0,0xfffffef0(%ebp)
 38b:   ff 
 38c:   f2 0f 10 85 a8 fe ff    movsd  0xfffffea8(%ebp),%xmm0
 393:   ff 
 394:   f2 0f 59 85 f0 fe ff    mulsd  0xfffffef0(%ebp),%xmm0
 39b:   ff 
 39c:   f2 0f 11 85 e8 fe ff    movsd  %xmm0,0xfffffee8(%ebp)
 3a3:   ff 
 3a4:   f2 0f 10 85 f8 fe ff    movsd  0xfffffef8(%ebp),%xmm0
 3ab:   ff 
 3ac:   f2 0f 5c 85 e8 fe ff    subsd  0xfffffee8(%ebp),%xmm0
 3b3:   ff 
 3b4:   f2 0f 11 85 e0 fe ff    movsd  %xmm0,0xfffffee0(%ebp)
 3bb:   ff 
 3bc:   f2 0f 10 43 40          movsd  0x40(%ebx),%xmm0
 3c1:   f2 0f 11 85 d8 fe ff    movsd  %xmm0,0xfffffed8(%ebp)
 3c8:   ff 
 3c9:   f2 0f 10 85 b8 fe ff    movsd  0xfffffeb8(%ebp),%xmm0
 3d0:   ff 
 3d1:   f2 0f 59 85 d8 fe ff    mulsd  0xfffffed8(%ebp),%xmm0
 3d8:   ff 
 3d9:   f2 0f 11 85 d0 fe ff    movsd  %xmm0,0xfffffed0(%ebp)
 3e0:   ff 
 3e1:   f2 0f 10 85 e0 fe ff    movsd  0xfffffee0(%ebp),%xmm0
 3e8:   ff 
 3e9:   f2 0f 5c 85 d0 fe ff    subsd  0xfffffed0(%ebp),%xmm0
 3f0:   ff 
 3f1:   f2 0f 11 85 c8 fe ff    movsd  %xmm0,0xfffffec8(%ebp)
 3f8:   ff 
 3f9:   f2 0f 10 85 c8 fe ff    movsd  0xfffffec8(%ebp),%xmm0
 400:   ff 
 401:   f2 0f 11 43 58          movsd  %xmm0,0x58(%ebx)
                           geo.y * xfp->m[1].z -
                           geo.z * xfp->m[2].z; 
        }
    }
}
 406:   81 c4 d4 01 00 00       add    $0x1d4,%esp
 40c:   5b                      pop    %ebx
 40d:   5d                      pop    %ebp
 40e:   c3                      ret    

Reply via email to