I need to plot this information. I wrote the code to calculate the frequency of words in a file, but I need it to plot the 15 most frequent words in a bar graph. I really don't even know how to start. Any help?
Code:
import string
def compItems((w1,c1), (w2,c2)):
if c1 > c2:
return - 1
elif c1 == c2:
return cmp(w1, w2)
else:return 1
def main():
print "This program analyzes word frequency in a file"
print "and prints a report on the n most frequent words.\n"
# get the sequence of words from the file
fname = raw_input("File to analyze: ")
text = open(fname,'r').read()
text = string.lower(text)
for ch in """!"#$%&()*+,-./:;<=>?@[\\]?_'`{|}?""":
text = string.replace(text, ch,' ')
words = string.split(text)
# construct a dictionary of word counts
counts = {}
for w in words:
try:
counts[w] = counts[w] + 1
except KeyError:
counts[w] = 1
# output analysis of n most frequent words.
n = 0
for words in counts:
n+=1
items =counts.items()
items.sort(compItems)
for i in range(n):
print "%-10s%5d" % items[i]
main()