from scipy import c_, arange
from scipy.io import read_array, write_array
import pickle, csv

class dbase:
	"""
	A simple data-frame, that reads and write csv/pickle files with variable names.
	Columns in the data can be accessed using x.get('a','c') where 'a' and 'c' are
	variable names.
	"""

	def __init__(self,f):
		"""
		Initializing the dbase class. Loading file f.
		"""
		self.load(f)

	def load(self,f):
		"""
		Loading data from a csv file or a pickle of the dbase class
		"""
		fext = self.__ext(f)
		if fext == 'csv':
			self.varnm = self.vardic(file(f,'r'))
			self.data = read_array(file(f,'r'), lines=(1,-1))
		elif fext == 'pickle':
			a = pickle.load(file(f,'r'))
			self.varnm = a.varnm
			self.data = a.data
		else:
			raise 'This class only works on csv and pickle files'

	def dump(self,f):
		"""
		Dumping the instance of the class into csv or pickle file
		"""
		fext = self.__ext(f)
		if fext == 'csv':
			writer = csv.writer(open(f,'w'))
			writer.writerow(self.varnm.keys())
			writer.writerows(self.data)
		elif fext == 'pickle':
			pickle.dump(self,file(f,'w'))
		else:
			raise 'This class only outputs pickle files'
	
	def __ext(self,f):
		"""
		Finding the file extension of the filename passed to dbase
		"""
		return f.split('.')[-1].strip()

	def vardic(self,f):
		"""
		Making a dictionary with variable names and indices
		"""
		dic = {}; j = 0

		# reading only the 1st line in the file and extracting variables names
		# names are linked in the dictionary to their, and the data's, index
		# making sure to strip leading and trailing white space
		for i in f.readline().split(','):
			dic[i.strip()] = j
			j += 1
	
		return dic

	def get(self,*var):
		"""
		Selecting a column based on variable labels. Assumes data are in columns.
		"""
		a = self.data[:,self.varnm[var[0]]]				# getting the data for the 1st element in self.data

		for i in var[1:]:						
			a = c_[a,self.data[:,self.varnm[i]]]		# concatenate column-wise, along last axis
	
		return a

if __name__ == '__main__':

	# creating simulated data and variable labels
	varnm = "a, b, c\n"							# variable labels
	file('data.csv','w').write(varnm)
	data = arange(15).reshape(5,3)				# the data array
	write_array(file('data.csv','a'),data, separator=', ', linesep='\n')

	# loading the data from the csv file and dumping the dbase class instance to a pickle file
	a = dbase("data.csv")
	a.dump("data.pickle")

	# loading the object from the pickle file
	print "\nLoading the dbase object from a pickle file\n"

	b = dbase("data.pickle")

	print "Data from dbase class\n", b.data
	print "\nVariable names from dbase class\n", b.varnm
	print "\nTwo columns selected using variable names\n", b.get('a','c')
	print "\nSaving data and variable names to a different csv file\n", b.dump("data_dump.csv")
