#!/usr/bin/env python


"""
some test code to test performance of the textreader code
"""

test_filename = "test_big_file.txt"
num_rows = 1000000
num_chunks = 10000
size_chunks = num_rows / num_chunks

if (size_chunks * num_chunks) <> num_rows:
    raise ValueError("num_chunks must divide equally into num_rows")


import os, timeit
import numpy as np
import textreader

if not os.path.exists(test_filename):
    # generate a big test file if it's not already there"
    print "writing a test file"
    f = file(test_filename, 'wt')
    for i in xrange(num_rows):
        f.write("123.456, 678.901\n")
    f.close()
    del f

test_file = file(test_filename)

def read_py(filename):
    f = file(filename)
    a = []
    for line in f:
        a.append([float(i) for i in line.split(",")])
    a = np.array(a, dtype=np.float64)
    return a

def read_py_chunks(filename):
    f = file(filename)
    a = np.empty((num_rows, 2), dtype=np.float64)
    for i in xrange(num_chunks):
        c = []
        for j in range(size_chunks):
            line = f.readline()
            c.append([float(x) for x in line.split(",")])
        a[i*size_chunks: (i+1)*size_chunks] = c
    return a

def read_all(filename):
    f = file(filename)
    a = textreader.readrows(f,
                            np.float64,
                            delimiter=',',
                            numrows=num_rows)
    return a

def read_chunks(filename):
    f = file(filename)
    a = np.empty((num_rows, 2), dtype=np.float64)
    for i in xrange(num_chunks):
        a[i*size_chunks: (i+1)*size_chunks] = textreader.readrows(f,
                                                                  np.float64,
                                                                  delimiter=',',
                                                                  numrows=size_chunks)
    return a

if __name__ == "__main__":
    num_tests = 2
    print "total number of rows: ", num_rows
    print "total number of chunks: ", num_chunks
    t = timeit.timeit(stmt="read_py(test_filename)",
                      setup="from __main__ import read_py, test_filename",
                      number = num_tests)
    print "pure python took: %f seconds"%t

    t = timeit.timeit(stmt="read_py_chunks(test_filename)",
                      setup="from __main__ import read_py_chunks, test_filename",
                      number = num_tests)
    print "pure python chunks took: %f seconds"%t

    t = timeit.timeit(stmt="read_all(test_filename)",
                      setup="from __main__ import read_all, test_filename",
                      number = num_tests)
    print "textreader all at once took: %f seconds"%t

    t = timeit.timeit(stmt="read_chunks(test_filename)",
                      setup="from __main__ import read_chunks, test_filename",
                      number = num_tests)
    print "textreader in chunks took : %f seconds"%t

### test to see if they get the same thing
#    a1 = read_py(test_filename)
#    a2 = read_py_chunks(test_filename)
#    a3 = read_all(test_filename)
#    a4 = read_chunks(test_filename)
#
#   # note: allclose because text file and python's "float()" don't do exactly the same thing
#    assert np.allclose(a1, a2)
#    assert np.allclose(a1, a3)
#    assert np.allclose(a1, a4)
#