Dear Pierre, I've attached the arraydict implementation file. You can run it and take a look at the following example on your own:
In [25]: run arraydict.py creates an arraydict with 10 elements in the current scope. Keys are the index numbers of items In [26]: a Out[26]: arraydict([(-0.51430764775177518, 0.17962503931139237), (-1.4037792804089142, 0.37263515556827359), (1.9048324627948983, 1.4155903391279885), (0.077070370958404841, -1.4284963747790793), (0.20177037521016888, 0.25023158062312373), (0.88821059412119174, 0.29415143595187959), (0.46224769848661729, -0.80670670514715426), (-0.079049832245684654, -2.5738917233959899), (-0.562854982548048, 2.0708323443154897), (-2.4176013660591691, 0.36401660943002978)], dtype=[('x', '<f8'), ('y', '<f8')]) In [27]: a.keys() Out[27]: set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) Now select items from a, based on a cut criteria on some field (this could also be from another arraydict object) In [28]: foo = a[a.x>0] In [29]: foo Out[29]: arraydict([(1.9048324627948983, 1.4155903391279885), (0.077070370958404841, -1.4284963747790793), (0.20177037521016888, 0.25023158062312373), (0.88821059412119174, 0.29415143595187959), (0.46224769848661729, -0.80670670514715426)], dtype=[('x', '<f8'), ('y', '<f8')]) In [30]: foo.keys() Out[30]: set([2, 3, 4, 5, 6]) This works, because I modified the __getitem__ method and deal with keys of arraydict type as given back by calling a.x>0. A new arraydict is created with the selected items and keys. Here's the stuff I couldn't figure out how to deal with, making selections on slices, lists etc... In [31]: bar = a[1:6:2] The selection is correct, because it's passed to ndarray.__getitem__ In [32]: bar Out[32]: arraydict([(-1.4037792804089142, 0.37263515556827359), (0.077070370958404841, -1.4284963747790793), (0.88821059412119174, 0.29415143595187959)], dtype=[('x', '<f8'), ('y', '<f8')]) In [33]: bar.keys() Out[33]: set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) However, the keys are not restricted :-( since bar is not a new arraydict. If I could get the indexes in the __array_finalize__ method it would be easy to set the mapping of the bar instance. Another solution could be to overwrite also the handling of slices, lists etc. in the __getitem__ method. Thanks for your help! Bernhard On May 9, 10:29 pm, Pierre GM <[EMAIL PROTECTED]> wrote:
On Wednesday 09 May 2007 08:54:37 Bernhard Voigt wrote: > I'm trying to subclass ndarray or recarray to build a record array that
has
> a dictionary with a mapping of keys to array indexes and vice versa. Bernhard, Could you send me the rest of your code ? I'd like to test a couple of
things
before committing a proper answer. _______________________________________________ Numpy-discussion mailing list [EMAIL PROTECTED]
.orghttp://projects.scipy.org/mailman/listinfo/numpy-discussion
import numpy import itertools class arraydict(numpy.ndarray): """ Extends the numpy.ndarray class by adding a dictionary that maps keys to array indexes. This is usefull if one has two different datasets with intersecting key sets but without having the same order and/or number of entries in each array and one wants to select items from one array based on a selection done on values from the other array. The __getitem__ method has been adapted that it works with keys given as slices in order to perform the following task: a = arraydict(keys1, dataset_1, dtype=[('x', float), ('y', float)]) b = arrraydict(keys2, dataset_2, dtype=[('temp', float)]) # get values from a where the temperature was high high = a[b.temp>100] """ def __new__(subtype, keys, data, dtype=None, copy=False): """ Creates a ndarray from data and maps corresponding keys to the array indexes The dtype argument is passed to the underlying array method. Params: keys - iterable of keys same length as data data - iterable of data records """ # Make sure we are working with an array if hasattr(data, 'next'): # data comes from an iterable array = numpy.fromiter(data, dtype=dtype) else: array = numpy.array(data, dtype=dtype, copy=copy) # Transform 'subarr' from an ndarray to our new subclass. array = array.view(subtype) try: # convert keys to an interator keys = iter(keys) except TypeError, e: raise TypeError('Keys cannot be converted to an iterator') # create mapping of keys to array indexes and vice versa array.__make_mapping(iter(keys)) return array def __array_finalize__(self, obj): """ Adds the mapping to the instance extracted from object Currently takes the complete mapping from object and not only those items that are in self (this needs to be fixed) """ if not hasattr(self, '__mapping'): if hasattr(obj, '_arraydict__mapping'): self.__mapping = obj._arraydict__mapping self.__reverse_mapping = obj._arraydict__reverse_mapping def __make_mapping(self, keys): """ Maps keys to array indexes, keys must be in the same order as the array data """ self.__mapping = dict((k,i) for i, k in enumerate(keys)) self.__reverse_mapping = dict((i,k) for k, i in self.__mapping.iteritems()) def __getattribute__(self, attr): """ Modified that field access as in recordarrays is possible, where a['x'] is the same as a.x """ try: if attr in numpy.ndarray.__getattribute__(self, 'dtype').fields.keys(): return self.__getitem__(attr) except AttributeError: pass return numpy.ndarray.__getattribute__(self, attr) def __getitem__(self, key): # in order to handle a[a.x>0] check # wethether the key is an arraydict of bool if isinstance(key, arraydict) and key.dtype == bool: # get the keys, make selection, build new array # the new array has the correct mapping, it is # based on the keys corresponding to the bool mask # given by the key arraydict return self.select(self.keys(key)) else: # this works for slices and other index specifications # however, the resulting arraydict has not a restricted mapping return numpy.ndarray.__getitem__(self, key) def keys(self, selection=None): """ Returns a set of keys corresponding to the given selection if selection is None all keys are returned selection can be a single integer or an iteralbe of ints or an iterable of boolean of the same length as this arraydict, masking the items to select """ if selection is None: return set(self.__mapping.iterkeys()) try: selection = iter(selection) except TypeError, e: # single item requested, must be an int if isinstance(selection, (int, long)): return set(self.__reverse_mapping[selection]) else: raise TypeError('selection must be an integer or a list of intergers or boolean values') # peek into the iterator for the type of items first = selection.next() type_ = type(first) # chain the first item back to the front of the selection selection = itertools.chain((first,), selection) # if the selection is a list of indexes, items must be int typed if type_ in (int, long, numpy.int32, numpy.int64): return set(self.__reverse_mapping[i] for i in selection) # if the selection is a mask, items must be boolean elif type_ in (bool, numpy.bool, numpy.bool_): selection = [s for s in selection] # masks must have the same length as the arraydict if len(selection) != len(self): raise Error('boolean selection not the same length as arraydict') return set(self.__reverse_mapping[i] for i, predicate in enumerate(selection) if predicate == True) else: raise TypeError('selection list items must have integer or boolean values, type is %s' % type_) def select(self, keys): """ Returns a arraydict with items from this arraydict specified by keys """ keys = sorted(keys) return arraydict(keys, (self.__getitem__(self.__mapping[k]) for k in keys), self.dtype, copy=False) def test(n=10): keys = range(n) data = ((a,b) for a, b in zip(numpy.random.normal(size=n), numpy.random.normal(size=n))) a= arraydict(keys, data, dtype=[('x', float), ('y', float)]) return a if __name__ == '__main__': a = test()
_______________________________________________ Numpy-discussion mailing list Numpy-discussion@scipy.org http://projects.scipy.org/mailman/listinfo/numpy-discussion