On 22/02/14 23:39, Sturla Molden wrote:

Ok, next runner up is Accelerate. Let's see how it compares to OpenBLAS
and MKL on Mavericks.

It seems Accelerate has roughly the same performance as MKL now.

Did the upgrade to Mavericks do this?



These are the compile lines, in case you wonder:

$ CC -O2 -o perftest_openblas -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib perftest_openblas.c -lopenblas

$ CC -O2 -o perftest_accelerate perftest_accelerate.c -framework Accelerate

$ source /opt/intel/composer_xe_2013/mkl/bin/mklvars.sh intel64
$ icc -O2 -o perftest_mkl -mkl -static-intel perftest_mkl.c




Sturla





#include <mach/mach.h>
#include <mach/mach_time.h>
#include <stdlib.h>
#include <Accelerate/Accelerate.h>

double nanodiff(const uint64_t _t0, const uint64_t _t1)
{   
    long double t0, t1, numer, denom, nanosec;
    mach_timebase_info_data_t tb_info;
    mach_timebase_info(&tb_info);
    numer = (long double)(tb_info.numer);
    denom = (long double)(tb_info.denom);    
    t0 = (long double)(_t0);
    t1 = (long double)(_t1);
    nanosec = (t1 - t0) * numer / denom;
    return (double)nanosec;
}

int main(int argc, char **argv)
{
    long double nanosec;
    int n = 512;
    int m = n, k = n;
    double *A = (double*)malloc(n*n*sizeof(double)); 
    double *B = (double*)malloc(n*n*sizeof(double)); 
    double *C = (double*)malloc(n*n*sizeof(double));
    uint64_t t0, t1;
    
    t0 = mach_absolute_time();
    
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
           m, n, k, 1.0, A, k, B, n, 1.0, C, n);

    t1 = mach_absolute_time();
    
    nanosec = nanodiff(t0, t1);
    
    printf("elapsed time: %g ns\n", (double)nanosec);

    free(A); free(B); free(C);
}


_______________________________________________
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to