> Girish Sahani wrote:
>> Hi,
>>
>> There is a code in my main function which is something like:
>>
>> while prunedFinal != []:
>> prunedNew = genColocations(prunedK) ***
>> tableInstancesNew =
>> genTableInstances(prunedNew,tableInstancesK)
>> tiCountDict = tiCount(tableInstancesNew)
>> tiDict = findPI(tableInstancesNew)
>> prunedFinal = pruneTI(tiDict,pi)
>> rulesDict = genRules(prunedFinal)
>> cpDict = findCP(rulesDict)
>> prunedRulesList = pruneCP(cpDict,cp)
>> prunedK = prunedFinal
>> tableInstancesK = tableInstancesNew
>> else:
>> return prunedRulesList
>>
>> prunedK and tableInstancesK are defined in the main function.
>
> defined as what ? functions, strings, lists, classes, ... ?
PrunedK is a list that contains 2 length strings and tableInstancesK is a
dictionary,its keys are 2 length strings and values are lists of lists
>
>> Before the
>> main function, i have defined the other functions such as
>> genColocations,genTableInstances,etc. Output of genColocations is to be
>> given to the next function genTableInstances,output of this function to
>> tiCount and findPI, and so on.
>> However i am getting an error at the line marked with ***.
>
> Which error ? How do you hope us to be of any help here if you don't *at
> least* provide the full traceback ? FWIW, the canonical way to do things
> is to:
> - provide minimal *runnable* code exposing the problem
> - explain what you hoped to get
> - explain what you got instead (including full traceback)
>
> As a matter of fact, it's often the case that one solves the problem
> when working on the first point !-)
>
> (snip)
Ohh...I was thinking that posting the whole code would not be a good idea.
The error i get above is:
line 266, in colocationMiner
prunedNew = genColocations(prunedK)
Anyways, i've attached the file colocations.py. The expected output is a
List of rules (prunedRulesList).These rules are themselves lists.e.g
['ab','c'] denotes the rule ab=>c.
Please do have a look if you have time :).
>
> --
> bruno desthuilliers
> python -c "print '@'.join(['.'.join([w[::-1] for w in p.split('.')]) for
> p in '[EMAIL PROTECTED]'.split('@')])"
> --
> http://mail.python.org/mailman/listinfo/python-list
>
#convert the input file into a dictionary (global ids mapped to feature type)
and a list of global id pairs
def get_colocations(filename):
lines = open(filename).read().split("\n")
colocnDict = {}
for line in lines:
n1, b1, n2, b2 = line.split(",")
n1 = int(n1)
n2 = int(n2)
a1 = b1.strip("'")
a2 = b2.strip("'")
colocnDict[n1] = a1
colocnDict[n2] = a2
return colocnDict
#get pairs of feature ids which are colocated
def getPairs(filename):
lines = open(filename).read().split("\n")
pairList = []
for line in lines:
n1, b1, n2, b2 = line.split(",")
pairList.append([n1, n2])
return pairList
#count number of occurences of each feature in the feature list and store in
featueCountDict
def getFeatureCount():
colocnDict = get_colocations("colocations.txt")
featureList= colocnDict.values()
featureListUnique = []
[featureListUnique.append(word) for word in featureList if not
featureListUnique.count(word)]
featureCountDict = {}
for feature in featureListUnique:
featureCountDict[feature] = featureList.count(feature)
return featureCountDict
def k2k1(string1, string2):
for c in string1:
string2 = string2.replace(c,"",1)
if len(string2) == 1:
string1 += string2
else:
pass
return string1
def dictInvert(d):
dictInv = {}
for k, v in d.iteritems():
keys = dictInv.setdefault(v, [])
keys.append(k)
return dictInv
#Generate candidate co-locations of size k+1 from size k
#get lower level subsets and prune it by antimonotone property
def genColocations(prunedK):
prunedNew = substringList = []
for string1 in prunedK:
for string2 in prunedK:
k = len(string2)
if string1 != string2:
string1 = k2k1(string1, string2)
if len(string1) == k+1:
prunedNew.append(string1)
for s in prunedNew:
substrings = [s[:i]+s[i+1:] for i in range(len(s))]
for string in substrings:
if string not in prunedK:
prunedNew.remove(s)
continue
continue
#tableInstancesNew is a dictionary with keys as k level colocations and values
as table instances
def genTableInstances(prunedNew,tableInstancesK):
colocnDict = get_colocations("colocations.txt")
tableInstancesNew = {}
for s in p:
substring1 = s[:len(s)-1]
substring2 = s[:len(s)-2]+s[len(s)-1:] #get 2 substrings
list1 = tableInstancesK(substring1)
list2 = tableInstancesK(substring2)
stringList1 = list(substring1)
stringList2 = list(substring2)
inter1 = filter(lambda x:x in stringList1,stringList2)
indexList1 = indexList2 = []
i=j=0
for char in inter1: #get positions of char. of inter1 in string1/2,
then corr. positions in list1/2
i = substring1.index(char)
j = substring2.index(char)
indexList1.append(i)
indexList2.append(j)
continue
char1 = filter(lambda x:x not in inter1,stringList1)
char2 = filter(lambda x:x not in inter1,stringList2)
charList = []
charList.append(char1[0])
charList.append(char2[0])
pairList = getPairs("colocations.txt")
for ti1 in list1:
for ti2 in list2:
for i in range(0,len(indexList1)-1):
index1 = indexList1(i)
index2 = indexList2(j)
if j < len(indexList2)-1 & ti1[index1] == ti2[index2]:
i+=1
j+=1
continue
elif i == len(indexList2)-1 & j == len(indexList2)-1 &
ti1[index1] == ti2[index2]:
if charList in pairList:
k = string2.index(char2[0])
instance = ti2(k)
tiNew = ti1.append(instance)
tiNewList = []
tiNewList.append(tiNew)
else:
break
else:
break
tableInstancesNew[s] = tiNewList
continue
return tableInstancesNew
#Storing tableInstances and their corresponding counts
def tiCount(tableInstancesNew):
for k,v in tableInstancesNew.iteritems():
tiCountdict[k] = len(v)
return tiCountDict
#finding participation index of a table instance
#pr=count(no. of distinct instances present in the colocation) /count(feature)
def findPI(tableInstancesNew):
pr = prList = []
i=j=k=0
colocnDict = get_colocations("colocations.txt")
for tableInstance in tableInstancesNew.values():
length = len(tableInstance(0))-1
for i in range(0,length,1):
for rowInstance in tableInstance:
prList.append(rowInstance[i:i+1])
continue
prListUnique = []
[prListUnique.append(word) for word in prList if not
prListUnique.count(word)]
featureCountDict = getFeatureCount()
if rowInstance[i] in featureCountDict.keys():
featureCount = featureCountDict(rowInstance[i])
pr[j] = round(len(prListUnique)*1.0 / featureCount,2)
prList = []
j+=1
i+=1
continue
pi[k] = min(pr)
k+=1
continue
tiDict = {}
tiDict = dict(zip(tableInstancesNew.keys(), pr)) #check order
return tiDict
#Pruning based on participation index
def pruneTI(tiDict,pi):
for ti in tiDict:
if tiDict[ti] >= pi:
prunedDict[ti] = tiDict[ti]
prunedFinal = prunedDict.keys()
return prunedFinal
#generates rules list of a colocation. e.g.'abc' gives ['ab','c'] which denotes
the rule ab => c
def genRulesList(colocation):
rulesList= [ ]
length = len(colocation)
i=0
colocnList = []
colocnList.append(colocation)
while colocNew != colocation:
colocNew = colocation + colocation[0]
colocNew = colocNew[1:len(colocNew)]
colocnList.append(colocNew)
continue
for colocn in colocnList:
for j in range(0,length - 1,1):
rule1 = [colocn[i:i+j],colocn[i+j:length]]
rule2 = [colocn[i+j:length],colocn[i:i+j]]
rulesList.append(rule1)
rulesList.append(rule2)
continue
continue
return rulesList
#generate the colocation rules,rulesDict maps each colocation to its rules list
def genRules(prunedFinal):
rulesDict = {}
for colocation in prunedFinal:
rulesDict[colocation] = genRulesList(colocation)
return rulesDict
#finds CP of a rule list and maps each rule to its cp
def findCP(rulesDict):
cpDict = {}
tiCountDict = tiCount(tableInstancesNew)
for rulesList in rulesDict.values():
for rule in rulesList:
string = " ".join(rule)
list1 = tiCountDict[string]
count1 = len(list1)
colocn = rule[0]
list2 = tiCountDict[colocn]
count2 = len(list2)
cpDict[rule] = round(count1*1.0 / count2,2)
return cpDict
#prunes the rules on basis of cp
def pruneCP(cpDict,cp):
for rule in rulesDict:
if rulesDict[subset] >= cp:
prunedRulesDict[ti] = rulesDict[ti]
prunedRulesFinal = prunedRulesDict.keys()
prunedRulesList = []
prunedRulesList.append(prunedRulesFinal)
return prunedRulesList
#generate 2-sized tableInstancesK and prunedK = prunedFinal from colocnDict
def colocationMiner(pi,cp):
colocationSet = {}
featureCountDict = getFeatureCount()
tiCountDict = featureCountDict
featureList = featureCountDict.keys()
prunedK = []
for i in range(0,len(featureList) - 1,1):
for k in range(1,len(featureList),1):
if i+k <= len(featureList)-1:
colocn = featureList[i] + featureList[i+k]
prunedK.append(colocn)
prunedFinal = prunedK
colocnDict = get_colocations("colocations.txt")
tiDict1 = dictInvert(colocnDict)
tiDict2 = {}
for k1,v1 in tiDict1.iteritems():
for k2,v2 in tiDict1.iteritems():
if k1 != k2 and not tiDict2.has_key(k2 + k1):
tiDict2[k1 + k2] = [v1] + [v2]
l = tiDict2.values()
for value in l:
l1 = value[0]
l2 = value[1]
l3 = l4 = l5 = []
for i in range(0,len(l1),1):
for j in range(0,len(l2),1):
l3.append([l1[i],l2[j]])
l4.append(l3)
pairList = getPairs("colocations.txt")
for element in l4:
for pair in element:
if pair not in pairList:
element.remove(pair)
l5.append(element)
tableInstancesK = dict(zip(tiDict2.keys(), l5)) #check order
for k,v in tableInstancesK.iteritems():
tiCountDict[k] = len(v)
while prunedFinal != []:
prunedNew = genColocations(prunedK)
tableInstancesNew = genTableInstances(prunedNew,tableInstancesK)
tiCountDict = tiCount(tableInstancesNew)
tiDict = findPI(tableInstancesNew)
prunedFinal = pruneTI(tiDict,pi)
rulesDict = genRules(prunedFinal)
cpDict = findCP(rulesDict)
prunedRulesList = pruneCP(cpDict,cp)
prunedK = prunedFinal
tableInstancesK = tableInstancesNew
else:
return prunedRulesList
print "List of colocation rules is ",prunedRulesList
print tiCountDict
--
http://mail.python.org/mailman/listinfo/python-list