#include <stdlib.h> 
 
void dupa() 
{ 
        double* wagi; 
        unsigned int i,synapsy=100; 
 
        wagi = (double*)malloc(100*synapsy); 
 
        for( i=0;i<synapsy;i++ ) { 
                wagi[i] = 0; 
        } 
 
} 
 
Simple test case, if compiled with 4.0 
gcc-4.0 (GCC) 4.0.0 20050212 (experimental) 
g++-4.0 -pedantic --save-temps -ftree-vectorize -O3 -Wall -mtune=pentium3 -c 
test.c 
 
essencialy I get: 
.LFB15: 
        pushl   %ebp 
.LCFI0: 
        movl    %esp, %ebp 
.LCFI1: 
        subl    $8, %esp 
.LCFI2: 
        movl    $10000, (%esp) 
        call    malloc 
        movl    $1, %edx 
        .p2align 4,,15 
.L2: 
        xorl    %ecx, %ecx 
        movl    %ecx, -8(%eax,%edx,8) 
        xorl    %ecx, %ecx 
        movl    %ecx, -4(%eax,%edx,8) 
        incl    %edx 
        cmpl    $101, %edx 
        jne     .L2 
        leave 
        ret 
so xor on ecx is executed twice! inside the loop. 
Looks simmilar with 3.4 
 
L5: 
        movl    $0, (%eax,%edx,8) 
        xorl    %ecx, %ecx 
        movl    %ecx, 4(%eax,%edx,8) 
        incl    %edx 
        cmpl    $100, %edx 
        jb      .L5 
 
 
on ultrasparc: 
.LLFB18: 
        save    %sp, -104, %sp 
.LLCFI0: 
        sethi   %hi(9216), %o0 
        call    malloc, 0 
         or     %o0, 784, %o0 
        mov     0, %g1 
.LL2: 
        add     %g1, %o0, %g2 
        add     %g1, 8, %g1 
        st      %g0, [%g2] 
        cmp     %g1, 800 
        bne     .LL2 
         st     %g0, [%g2+4] 
        jmp     %i7+8 
         restore 
 
It's odd because I do specify -O3 just to make sure code will be as fast as 
possible :) 
 
-O0 uses float point instructions to zero it, that's extremly slow than. 
and -O1 uses float point too, but code is 3x smaller and neater: 
        fldz 
.L2: 
        fstl    -8(%eax,%edx,8) 
        incl    %edx 
        cmpl    $101, %edx 
        jne     .L2 
        fstp    %st(0)

-- 
           Summary: xor is enclosed in loop, and exectuted on each iteration
                    of for statement
           Product: gcc
           Version: 4.0.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: c++
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: gj at pointblue dot com dot pl
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: i686
  GCC host triplet: i686
GCC target triplet: i686


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19922

Reply via email to