Python2.7上测试通过
import urllib2
import re
from collections import Counter
def get_data(url):
resp = urllib2.urlopen(url).read().lower()
return resp
def analyse(text, n=1):
''' show the n most common words in text '''
res = Counter(re.split(r'\W+', text, flags=re.M)).most_common(n)
print('words\ttimes')
print('\n'.join([k+'\t'+str(v) for k,v in res]))
def main():
data = get_data('
http://www.umich.edu/~umfandsf/other/ebooks/alice30.txt')
analyse(data, 10)
main()
结果是
words times
the 1642
and 872
to 729
a 632
it 595
she 553
i 543
of 514
said 462
you 411