def merge(pairings): sets = {} for x1, x2 in pairings: newset = (sets.get(x1, frozenset([x1])) | sets.get(x2, frozenset([x2]))) for i in newset: sets[i] = newset
return [list(aset) for aset in set(sets.itervalues())]
Looks good. I used it as inspiration for this new one, which takes less time for large datasets, and especially for those where a good number of merges are expected (at least that looks to be the pattern; with some large datasets this takes less than a quarter of the time as the one above). I'm sure it can be improved still -- yours is still faster in many cases.
def merge2(pairings):
elements = {}
sets = []
for x1, x2 in pairings:
i = [elements.get(x1, -1), elements.get(x2, -1)]
i.sort()
if i[1] == -1:
i[1] = len(sets)
sets.append(set([x1, x2]))
elif i[0] == -1:
sets[i[1]] |= set([x1, x2])
elif i[0] == i[1]:
continue
else:
sets[i[1]] |= sets[i[0]]
sets[i[0]].clear() for x in sets[i[1]]:
elements[x] = i[1]return [list(s) for s in sets if s]
# Comparison import profile import random
# Play with these values xMax, nPairs = 1000, 5000
l = [[random.randint(0, xMax), random.randint(0, xMax)] for i in range(nPairs)]
print 'merge2:'
profile.run('merge2(l)') # Mineprint 'merge:'
profile.run('merge(l)') # Reinhold's-- Brian Beck Adventurer of the First Order -- http://mail.python.org/mailman/listinfo/python-list
