import requests import re from bs4 import BeautifulSoup import json import urllib import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt import numpy as np import xlwt import jieba.analyse from PIL import Image,ImageSequence url='https://juejin.im/search?query=前端' res = requests.get(url) res.encoding = "utf-8" soup = BeautifulSoup(res.text,"html.parser") ajaxUrlBegin='https://search-merger-ms.juejin.im/v1/search?query=%E5%89%8D%E7%AB%AF&page=' ajaxUrlLast='&raw_result=false&src=web' for i in range(0,25): ajaxUrl=ajaxUrlBegin+str(i)+ajaxUrlLast; response=urllib.request.urlopen(ajaxUrl) ajaxres=response.read().decode('utf-8') json_str = json.dumps(ajaxres) strdata = json.loads(json_str) data=eval(strdata) #str轉換爲dict for i in range(0,25): ajaxUrl = ajaxUrlBegin + str(i) + ajaxUrlLast; for i in range(0,19): result=[] result=data['d'][i]['title'] print(result+'\n') f = open('finally.txt', 'a', encoding='utf-8') f.write(result) f.close() f = open('finally.txt', 'r', encoding='utf-8') str = f.read() stringList = list(jieba.cut(str)) symbol = {"/", "(", ")", " ", ";", "!", "、", ":","+","?"," ",")","(","?",",","之","你","了","嗎","】","【"} stringSet = set(stringList) - symbol title_dict = {} for i in stringSet: title_dict[i] = stringList.count(i) print(title_dict) di = title_dict wbk = xlwt.Workbook(encoding='utf-8') sheet = wbk.add_sheet("wordCount") k = 0 for i in di.items(): sheet.write(k, 0, label=i[0]) sheet.write(k, 1, label=i[1]) k = k + 1 wbk.save('前端數據.xls') font = r'C:\Windows\Fonts\simhei.ttf' content = ' '.join(title_dict.keys()) image = np.array(Image.open('test.jpg')) wordcloud = WordCloud(background_color='white', font_path=font, mask=image, width=1000, height=860, margin=2).generate(content) plt.imshow(wordcloud) plt.axis("off") plt.show() wordcloud.to_file('c-cool.jpg')
生成詞雲html