------- Comment #14 from dirtyepic at gentoo dot org 2007-07-24 19:37 ------- Created an attachment (id=13965) --> (http://gcc.gnu.org/bugzilla/attachment.cgi?id=13965&action=view) zlib testcase
> A fix for PR25413 was committed to mainline. > Ryan, could you please check if it solves the zlib miscompilation? > Andrew, would you plase check if it solves the libgcc miscompilation that you > are seeing? Unfortunately it doesn't. I'm still getting unaligned movdqa instructions with both mainline and the 4.2 patch. Both testcases on this bug now work however, so maybe the problem lies elsewhere. I'm attaching a (badly) reduced testcase from inftrees.c in zlib which i believe shows the bug. Compile with -O -msse2 -ftree-vectorize. On i686 i'm getting this: inftrees.lo: file format elf32-i386 Disassembly of section .text: 00000000 <inflate_table>: 0: 55 push %ebp 1: 89 e5 mov %esp,%ebp 3: 53 push %ebx 4: 83 ec 24 sub $0x24,%esp 7: 8b 5d 0c mov 0xc(%ebp),%ebx a: 8b 4d 10 mov 0x10(%ebp),%ecx d: 8d 45 d8 lea -0x28(%ebp),%eax 10: 66 0f ef c0 pxor %xmm0,%xmm0 14: 66 0f 7f 00 movdqa %xmm0,(%eax) 18: 66 0f 7f 40 10 movdqa %xmm0,0x10(%eax) 1d: 85 c9 test %ecx,%ecx 1f: 74 16 je 37 <inflate_table+0x37> 21: ba 00 00 00 00 mov $0x0,%edx 26: 0f b7 04 53 movzwl (%ebx,%edx,2),%eax 2a: 66 83 44 45 d8 01 addw $0x1,-0x28(%ebp,%eax,2) 30: 83 c2 01 add $0x1,%edx 33: 39 ca cmp %ecx,%edx 35: 75 ef jne 26 <inflate_table+0x26> 37: b8 0f 00 00 00 mov $0xf,%eax 3c: 8d 55 d8 lea -0x28(%ebp),%edx 3f: 66 83 3c 42 00 cmpw $0x0,(%edx,%eax,2) 44: 75 05 jne 4b <inflate_table+0x4b> 46: 83 e8 01 sub $0x1,%eax 49: 75 f4 jne 3f <inflate_table+0x3f> 4b: 83 c4 24 add $0x24,%esp 4e: 5b pop %ebx 4f: 5d pop %ebp 50: c3 ret And without the vectorizer: inftrees.lo: file format elf32-i386 Disassembly of section .text: 00000000 <inflate_table>: 0: 55 push %ebp 1: 89 e5 mov %esp,%ebp 3: 53 push %ebx 4: 83 ec 20 sub $0x20,%esp 7: 8b 5d 0c mov 0xc(%ebp),%ebx a: 8b 4d 10 mov 0x10(%ebp),%ecx d: b8 00 00 00 00 mov $0x0,%eax 12: 8d 55 dc lea -0x24(%ebp),%edx 15: 66 c7 04 42 00 00 movw $0x0,(%edx,%eax,2) 1b: 83 c0 01 add $0x1,%eax 1e: 83 f8 10 cmp $0x10,%eax 21: 75 f2 jne 15 <inflate_table+0x15> 23: 85 c9 test %ecx,%ecx 25: 74 16 je 3d <inflate_table+0x3d> 27: ba 00 00 00 00 mov $0x0,%edx 2c: 0f b7 04 53 movzwl (%ebx,%edx,2),%eax 30: 66 83 44 45 dc 01 addw $0x1,-0x24(%ebp,%eax,2) 36: 83 c2 01 add $0x1,%edx 39: 39 ca cmp %ecx,%edx 3b: 75 ef jne 2c <inflate_table+0x2c> 3d: b8 0f 00 00 00 mov $0xf,%eax 42: 8d 55 dc lea -0x24(%ebp),%edx 45: 66 83 3c 42 00 cmpw $0x0,(%edx,%eax,2) 4a: 75 05 jne 51 <inflate_table+0x51> 4c: 83 e8 01 sub $0x1,%eax 4f: 75 f4 jne 45 <inflate_table+0x45> 51: 83 c4 20 add $0x20,%esp 54: 5b pop %ebx 55: 5d pop %ebp 56: c3 ret -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25413