On Thursday, June 22, 2017 at 12:16:28 PM UTC+1, [email protected] wrote:
> I want to write a common file in which It can add the frequency by adding
> multiple csv file and if the same words are repeated in python then it should
> add the frequency in the common file can any one help me please
>
>
> import re
> import operator
> import string
>
> class words:
> def __init__(self,fh):
> self.fh = fh
> def read(self):
> for line in fh:
> yield line.split()
>
> if __name__ == "__main__":
> frequency = {}
> document_text = open('data_analysis.csv', 'r')
> common1_file = open("common_file1.csv", "r")
>
> text_string = document_text.read().lower()
> match_pattern = re.findall(r'\b[a-z]{3,15}\b', text_string)
>
> text_string_one = common1_file.read().lower()
> match_pattern_one = re.findall(r'\b[a-z]{3,15}\b', text_string_one)
> #print("match_pattern"+(str(match_pattern)))
> for word in match_pattern:
> for word1 in match_pattern_one:
> count = frequency.get(word,0)
> count1 = frequency.get(word1,0)
> if word1 == word:
> frequency[word] = count + count1
> else:
> frequency[word] = count
>
>
> frequency_list = frequency.keys()
> text_file = open("common_file1.csv", "w")
> for words in frequency_list:
> data = (words, frequency[words])
> print (data)
> #text_file = open("common_file1.csv", "w")
> #for i in data:
> #store_fre = (str(data)+"\n")
> text_file.write(str(data)+"\n")
>
>
> text_file.close()
>
>
> this is my code written by me til now but not getting satisfied results
Dictionary 'frequency' is updated only with values of 0.
If the aim is to get a count of occurrences for each word
where the word exists in both input files, you could replace this:
for word in match_pattern:
for word1 in match_pattern_one:
count = frequency.get(word,0)
count1 = frequency.get(word1,0)
if word1 == word:
frequency[word] = count + count1
else:
frequency[word] = count
with this:
all_words = match_pattern + match_pattern_one
word_set = set(match_pattern) & set(match_pattern_one)
while word_set:
word = word_set.pop()
count = all_words.count(word)
frequency[word] = count
Other observations:
- Reading from and writing to the csv files is not utilsing the csv format
- The regex may be too restrictive and not all expected words extracted
- The output is written to one of the input files, overwriting the original
content of the input file
--
https://mail.python.org/mailman/listinfo/python-list