David Cournapeau wrote:
Hi,

When trying to speed up some matplotlib routines with the matplotlib dev team, I noticed that numpy.clip is pretty slow: clip(data, m, M) is slower than a direct numpy implementation (that is data[data<m] = m; data[data>M] = M; return data.copy()). My understanding is that the code does the same thing, right ?

Below, a small script which shows the difference (twice slower for a 8000x256 array on my workstation):

I think there was a bug in your clip2_bench that was making it artificially fast. Attached is a script that I think gives a more fair comparison, in which clip1 and clip2 are nearly identical, and includes a third version using putmask which is faster than either of the others:

         15 function calls in 6.450 CPU seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.004    0.004    6.450    6.450 cliptest.py:10(bench_clip)
        1    2.302    2.302    2.302    2.302 cliptest.py:19(clip2_bench)
        1    0.013    0.013    2.280    2.280 cliptest.py:15(clip1_bench)
10 2.267 0.227 2.267 0.227 /usr/local/lib/python2.4/site-packages/numpy/core/fromnumeric.py:357(clip)
        1    1.498    1.498    1.498    1.498 cliptest.py:25(clip3_bench)
1 0.366 0.366 0.366 0.366 cliptest.py:6(generate_data_2d)
        0    0.000             0.000          profile:0(profiler)

Eric


import numpy as N

#==========================
# To benchmark imshow alone
#==========================
def generate_data_2d(fr, nwin, hop, len):
    nframes = 1.0 * fr / hop * len
    return N.random.randn(nframes, nwin)

def bench_clip():
    m   = -1.
    M   = 1.
# 2 minutes (120 sec) of sounds @ 8 kHz with 256 samples with 50 % overlap
    data    = generate_data_2d(8000, 256, 128, 120)

    def clip1_bench(data, niter):
        for i in range(niter):
            blop    = N.clip(data, m, M)
    def clip2_bench(data, niter):
        for i in range(niter):
            data[data<m]    = m
            data[data<M]    = M
            blop    = data.copy()

    clip1_bench(data, 10)
    clip2_bench(data, 10)

if __name__ == '__main__':
    # test clip
    import hotshot, hotshot.stats
    profile_file    = 'clip.prof'
    prof    = hotshot.Profile(profile_file, lineevents=1)
    prof.runcall(bench_clip)
    p = hotshot.stats.load(profile_file)
    print p.sort_stats('cumulative').print_stats(20)
    prof.close()

    cheers,

    David
_______________________________________________
Numpy-discussion mailing list
[email protected]
http://projects.scipy.org/mailman/listinfo/numpy-discussion

import numpy as N

#==========================
# To benchmark imshow alone
#==========================
def generate_data_2d(fr, nwin, hop, len):
    nframes = 1.0 * fr / hop * len
    return N.random.randn(nframes, nwin)

def bench_clip():
    m   = -1.
    M   = 1.
    data    = generate_data_2d(8000, 256, 128, 120)

    def clip1_bench(data, niter):
        for i in range(niter):
            blop    = N.clip(data, m, M)

    def clip2_bench(data, niter):
        for i in range(niter):
            d = data.copy()
            d[d<m]    = m
            d[d>M]    = M

    def clip3_bench(data, niter):
        for i in range(niter):
            d = data.copy()
            N.putmask(d, d<m, m)
            N.putmask(d, d>M, M)

    clip1_bench(data, 10)
    clip2_bench(data, 10)
    clip3_bench(data, 10)

if __name__ == '__main__':
    # test clip
    import hotshot, hotshot.stats
    profile_file    = 'clip.prof'
    prof    = hotshot.Profile(profile_file, lineevents=1)
    prof.runcall(bench_clip)
    p = hotshot.stats.load(profile_file)
    print p.sort_stats('cumulative').print_stats(30)
    prof.close()


_______________________________________________
Numpy-discussion mailing list
[email protected]
http://projects.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to