Hello, I am unable to see the expected performance gain using vectorizatio on powerPC using Linux Suse. I've prepared a simple test and compiled it once with vectorization and once without the vectorization flags. I'd appriciate if someone could point me as to what Im doing wrong here.Bellow are the results of the test runs: time ./TestNoVec 92200 8 89720 1000 real 0m23.549s
time ./TestVec 92200 8 89720 1000 real 0m22.845s Here is the code: #include <iostream> #include <stdio.h> #include <stdlib.h> typedef float ARRTYPE; int main ( int argc, char *argv[] ) { int m_nSamples = atoi( argv[1] ); int itBegin = atoi( argv[2] ); int itEnd = atoi( argv[3] ); int iSizeMain = atoi( argv[ 4 ] ); ARRTYPE *pSum1 = new ARRTYPE[ 100000 ]; ARRTYPE *pSum = new ARRTYPE[ 100000 ]; for ( int it = 0; it < m_nSamples; it++ ) { pSum[ it ] = it / itBegin; pSum1[ it ] = itBegin / ( it + 1 ); } ARRTYPE *pVec1 = (ARRTYPE*) malloc (sizeof(ARRTYPE) *m_nSamples); ARRTYPE *pVec2 = (ARRTYPE*) malloc (sizeof(ARRTYPE) *m_nSamples); for ( int i = 0, j = 0; i < m_nSamples - 5; i++ ) { for( int it = itBegin; it < itEnd; it++ ) pVec1[ it ] += pSum[ it ] + pSum1[ it ]; } free( pVec1 ); free( pVec2 ); } Compilation flag for No vectorization: gcc -DTIXML_USE_STL -I /home/build/build -I /home/build/build -I. -I /usr/local/include -I /usr/include -O3 -fomit-frame-pointer -mtune=powerpc -falign-functions=16 -fprefetch-loop-arrays -fpeel-loops -funswitch-loops -fPIC -mcpu=powerpc -m64 -fargument-noalias -funroll-loops -ftree-vectorizer-verbose=7 -fdump-tree-vect-details -c -o Test.o Test.cpp gcc -lpthread -lz -lm -lstdc++ -DTIXML_USE_STL -I /home/build/build -I /home/build/build -I. -I /usr/local/include -I /usr/include -O3 -fomit-frame-pointer -mtune=powerpc -falign-functions=16 -fprefetch-loop-arrays -fpeel-loops -funswitch-loops -fPIC -mcpu=powerpc -m64 -fargument-noalias -funroll-loops -ftree-vectorizer-verbose=7 -fdump-tree-vect-details -L/usr/local/lib64 -DTIXML_USE_STL -pthread -L. -L /home/build/build/lib64 -L /home/build/build/lib64 -L /usr/lib64 -L /lib64 -L /opt/gnome/lib64 -o TestNoVec Test.o Compilation of vectorized code: gcc -DTIXML_USE_STL -I /home/build/build -I /home/build/build -I. -I /usr/local/include -I /usr/include -O3 -fomit-frame-pointer -mtune=powerpc -falign-functions=16 -fprefetch-loop-arrays -fpeel-loops -funswitch-loops -ftree-vectorize -fPIC -mcpu=powerpc -maltivec -mabi=altivec -m64 -fargument-noalias -funroll-loops -ftree-vectorizer-verbose=7 -fdump-tree-vect-details -c -o Test.o Test.cpp gcc -lpthread -lz -lm -lstdc++ -DTIXML_USE_STL -I /home/build/build -I /home/build/build -I. -I /usr/local/include -I /usr/include -O3 -fomit-frame-pointer -mtune=powerpc -falign-functions=16 -fprefetch-loop-arrays -fpeel-loops -funswitch-loops -ftree-vectorize -fPIC -mcpu=powerpc -maltivec -mabi=altivec -m64 -fargument-noalias -funroll-loops -ftree-vectorizer-verbose=7 -fdump-tree-vect-details -L/usr/local/lib64 -DTIXML_USE_STL -pthread -L. -L /home/build/build/lib64 -L /home/build/build/lib64 -L /usr/lib64 -L /lib64 -L /opt/gnome/lib64 -o TestVec Test.o -- Summary: Vectorization on power PC Product: gcc Version: 4.3.0 Status: UNCONFIRMED Severity: major Priority: P3 Component: c++ AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: eyal at geomage dot com GCC build triplet: gcc (GCC) 4.3.0 20071124 (experimental) GCC host triplet: PowerPC GCC target triplet: PowerPC http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35117