import jieba f=open('new.txt','r',encoding='utf-8') new=f.read() #關閉文件流 f.close() #刪除數字和標點符號 str = '''1234567890一!!「」,。?、;’"',.、:()()\n‘’''' for i in str: new=new.replace(i," ") NEW=list(jieba.lcut(new)) exclude = ['說','有','得','沒','的','他','了','她','是','在','—','你','走','對','他們','着','把','不','也','我','人','而', '與','就','但是','那','要','又','想','和','一個',' ','呢','很','一點','都','去', '沒有','個','上','給','來','還','到','這','\u3000','點','小','看'] dictionary={} for i in NEW: #只出現一次通常沒有意義 if NEW.count(i)==1: continue else: dictionary[i]=NEW.count(i) #刪除助詞 for i in exclude: if i in dictionary.keys(): del dictionary[i] #排序 dictionary=sorted(dictionary.items(),key=lambda item:item[1],reverse=True) for i in range(19): print(dictionary[i])
運行結果:spa
('工會', 17)
('日', 16)
('月', 12)
('清明節', 11)
('經費', 10)
('不許', 8)
('元', 7)
('將', 7)
('上調', 6)
('節日', 6)
('假期', 6)
('規定', 5)
('基層', 5)
('號', 5)
('汽油', 5)
('每升', 4)
('福利', 4)
('標準', 4)
('發放', 4)code