Oops, the attachments! (always the same history) El dt 23 de 01 del 2007 a les 20:33 +0100, en/na Francesc Altet va escriure: > El 2007-01-22 23:59:03 GMT, en/na Pierre GM va escriure: > > Other example: what's more efficient ? > > myvar = <ndarray>numpy.empty(shape,dtype) > > or > > myvar = PyArray_EMPTY(dims, NPY_TYPE) > > Ok. The second one indeed, but possibly less than you realize (just a > 25%, see some benchmarks below). The main point is, as always, avoiding > premature optimization. You should first port your code to Pyrex, and > then concentrate on the hot points. If some python calls are critical > for you, then, go ahead and choose the C call. > > > But elsewhere in the manual is given the example of a loop using `range`, > > when > > one should use some explicit interval, and my understanding was that using > > python expressions was not as efficient as having more proper C > > expressions. > > Is this the case here ? Do I have to reimplement __getitem__ on arrays, or > > could I just keep on using the current approach ? > > Special methods (like __getitem__) of Pyrex extensions performs exactly > the same than a __getitem__ made in pure C extensions. So, you don't > have be worried about that. > > A benchmark that I've made (I was curious too ;), and that I'm > attaching, proves this. Here is the run on a pretty old machine: > > $ python run_bench.py > ******************** NumPy times ********************************* > time for __len__ (numpy)--> 0.203 > sum (numpy)--> 49987.2991813 > time for __getitem__ (numpy)--> 0.314 > ******************** Pyrex times ********************************* > time for __len__ (pyrex)--> 0.198 > sum (pyrex)--> 49987.2991813 > time for __getitem__ (pyrex)--> 0.172 > ********* Comparing NumPy creation times (python and C) ********** > time for creating an empty array (python)--> 3.305 > time for creating an empty array (C)--> 2.664 > > In this case, the __getitem__ of Pyrex seems to perform better than the > __getitem__ of the ndarray object written in C (almost a 2x, in fact). > However, this is probably an ilusion, as the ndarray __getitem__ will do > far more work than the Pyrex one. OTOH, the __len__ method is far more > simple, and can be taken as the demonstration that the overhead of > calling special methods in Pyrex from Python is similar to C > counterparts. Finally, the difference of overhead in using a Python or a > C call for creating an empty array is shown in the last part of the > benchmark. All in all, a 25% of difference is not that much. > > Cheers, > -- Francesc Altet | Be careful about using the following code -- Carabos Coop. V. | I've only proven that it works, www.carabos.com | I haven't tested it. -- Donald Knuth
"""Here are some definitions for sharing between extensions.
""" # Standard C functions. cdef extern from "stdlib.h": ctypedef long size_t void *malloc(size_t size) void free(void *ptr) # API for NumPy objects cdef extern from "numpy/arrayobject.h": # Types ctypedef int npy_intp # Functions object PyArray_GETITEM(object arr, void *itemptr) int PyArray_SETITEM(object arr, void *itemptr, object obj) object PyArray_EMPTY(int m, npy_intp *dims, int type, int fortran) # Classes ctypedef extern class numpy.dtype [object PyArray_Descr]: cdef int type_num, elsize, alignment cdef char type, kind, byteorder, hasobject cdef object fields, typeobj ctypedef extern class numpy.ndarray [object PyArrayObject]: cdef char *data cdef int nd cdef npy_intp *dimensions cdef npy_intp *strides cdef object base cdef dtype descr cdef int flags # The NumPy initialization funtion void import_array()
all: python setup.py build_ext --inplace bench: all python run_bench.py .PHONY: clean clean: rm -rf *~ *.so *.c *.o build
definitions.pxd pyrex_bench.pyx run_bench.py setup.py
import numpy from definitions cimport import_array, \ malloc, free, npy_intp, \ PyArray_GETITEM, PyArray_EMPTY, \ ndarray, dtype # NumPy must be initialized import_array() ####################################################################### # Bench for calling special methods ####################################################################### cdef class myarray: """Class that implements some added features from a ndarray object. """ cdef long nslots cdef npy_intp stride cdef void *mydata cdef ndarray myarray def __init__(self, nparray): self.nslots = len(nparray) self.myarray = <ndarray>nparray self.mydata = <void *>self.myarray.data self.stride = self.myarray.strides[0] def __len__(self): return self.nslots def __getitem__(self, long key): cdef long offset offset = <long>(key * self.stride) return PyArray_GETITEM(self.myarray, self.mydata + offset) ######################################################################## # Bench for creating NumPy objects ######################################################################## def empty1(shape, dtype_, niter): cdef int i cdef ndarray myarray for i from 0 <= i < niter: myarray = <ndarray>numpy.empty(shape, dtype_) def empty2(shape, dtype dtype_, niter): cdef int i, m, npy_type cdef npy_intp *dims cdef ndarray myarray npy_type = dtype_.type_num m = len(shape) dims = <npy_intp *>malloc(m * sizeof(npy_type)) for i from 0 <= i < m: dims[i] = shape[i] for i from 0 <= i < niter: myarray = PyArray_EMPTY(m, dims, npy_type, 0) free(<void *>dims) ## Local Variables: ## mode: python ## py-indent-offset: 2 ## tab-width: 2 ## fill-column: 78 ## End:
from time import time import numpy from pyrex_bench import myarray, empty1, empty2 N = 1000*1000 rnd = numpy.random.rand(N) niter = 100*1000 print "******************** NumPy times *********************************" t1 = time(); l = 0. for i in xrange(niter): l += len(rnd) print "time for __len__ (numpy)-->", round(time()-t1,3) t1 = time(); s = 0 for i in xrange(niter): s += rnd[i] print "sum (numpy)-->", s print "time for __getitem__ (numpy)-->", round(time()-t1,3) print "******************** Pyrex times *********************************" ma = myarray(rnd) t1 = time(); l = 0. for i in xrange(niter): l += len(rnd) print "time for __len__ (pyrex)-->", round(time()-t1,3) t1 = time(); s = 0 for i in xrange(niter): s += ma[i] print "sum (pyrex)-->", s print "time for __getitem__ (pyrex)-->", round(time()-t1,3) print "********* Comparing NumPy creation times (python and C) **********" t1 = time() empty1((N,), numpy.float64, niter) print "time for creating an empty array (python)-->", round(time()-t1,3) t1 = time() empty2((N,), numpy.dtype(numpy.float64), niter) print "time for creating an empty array (C)-->", round(time()-t1,3)
#!/usr/bin/env python from distutils.core import setup from distutils.extension import Extension from Pyrex.Distutils import build_ext import numpy pyx_sources = ['pyrex_bench.pyx'] cmdclass = {'build_ext': build_ext} pyx_ext = Extension('pyrex_bench', pyx_sources, include_dirs = [numpy.get_include()]) # Call the routine which does the real work setup(name = 'pyrex_bench', description = 'Small benchmark of Pyrex methods and functions', ext_modules = [pyx_ext], cmdclass = cmdclass, )
_______________________________________________ Numpy-discussion mailing list Numpy-discussion@scipy.org http://projects.scipy.org/mailman/listinfo/numpy-discussion