import numpy as N
import pylab, csv, cPickle

def csvconvert(col):
	"""
	Converting data in a string array to the appropriate type
	"""
	# find out if the data column is an int, float, date, or string
	try: 
		# if a missing value is present int variables will be up-cast to float
		return col.astype('i')							# converting to integer
	except ValueError: 
		try: 
			return col.astype('f')						# converting to float
		except ValueError: 
			try:
				return pylab.datestr2num(col)			# converting the dates column to a date-number
			except ValueError: 
				return col

def load(fname,delim = ',',has_varnm = True, prn_report = True):
	"""
	Loading data from a file using the csv module. Returns a list of arrays.
	Possibly with different types.
	"""
	f = open(fname,'rb')
	
	# getting the variable names if they are available
	if has_varnm:
		varnm = f.readline().split(delim)

	# loading the rest of the data
	reader = csv.reader(f, delimiter = delim)
	
	# putting the data in an array
	data = N.array([i for i in reader])

	# converting the data to an appropriate data type
	data = [csvconvert(i) for i in data.T]

	# number of columns
	cols = len(data)
	
	# if no variable names are specified, create some
	if not has_varnm:
		varnm = ['col%s' % str(i+1) for i in xrange(cols)]
	else:
		# making sure that the variable names contain no leading or trailing spaces
		varnm = [i.strip() for i in varnm]

	# collecting datatypes and variable names for the different columns
	descr = [(varnm[i],data[i].dtype) for i in xrange(cols)]

	# converting to a recarray
	data = N.rec.fromarrays(data, dtype=descr)
	
	# load report
	if prn_report:
		print "##########################################\n"
		print "Loaded file: %s\n" % fname
		print "Nr obs: %s\n" % data.shape[0]
		print "Variables and datatypes:\n"
		for i in data.dtype.descr:
			print "Varname: %s, Type: %s, Sample: %s" % (i[0], i[1], str(data[i[0]][0:3]))
		print "\n##########################################\n"
		
	return data

def show_dates(dates):
	return N.array([i.strftime('%d %b %y') for i in pylab.num2date(dates)])

if __name__ == '__main__':

	# creating data
	data = [['col1', 'col2', 'col3', 'col4', 'col5', 'col6'],
			['1','3','1/97','1.12','2.11','1.2'],
			['1','2','3/97','1.21','3.12','1.43'],
			['2','1','2/97','1.12','2.11','1.28'],
			['2','2','4/97','1.33','2.26','1.23'],
			['2','2','5/97','1.73','2.42','1.26']]

	# saving data to csv file
	f = open('testdata.csv','wb')
	output = csv.writer(f)
	for i in data:
		output.writerow(i)
	f.close()

	# opening data file with variable names
	ra = load('testdata.csv')	
