我分析王者榮耀網站上面的英雄資料庫發現全部英雄的頁面基本上都是連貫的,而且仍是偏向與靜態網頁沒有過多的JavaScript渲染,因此比較適合來一波窮舉遍歷爬蟲,可是速度也很快。利用python語法字符串操做,列表操做,字典操做等等循環遍歷英雄頁面利用python列表切片索引和字符串拼接等等方法來解析HTML獲得皮膚圖片連接和背景故事文本,將皮膚圖片二進制文件保存爲png高清大圖,將全部英雄背景故事合併保存爲文本文檔html
#!/usr/bin/env python # -*- encoding: utf-8 -*- import requests fo = open('./wzry-jpg/wzry-pf.txt', 'w') path = './wzry-jpg/' ls = [] for i in range(105,200): url = "http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{0}/{0}-bigskin-1.jpg".format(i) status_code = requests.request('get', url).status_code if status_code != 200: continue for j in range(1, 9): imgurl = url[:-5] + '{}.jpg'.format(j) response = requests.request('get', imgurl) if response.status_code != 200: continue ls.append(imgurl) ##下載 print("開始下載第{}-{}個英雄皮膚圖片>>>".format(i,j), end='') with open(path+str(i)+'-'+str(j)+'.png', 'wb') as f: f.write(response.content) print('======下載完成======') for i in range(501,516): url = "http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{0}/{0}-bigskin-1.jpg".format(i) status_code = requests.request('get', url).status_code if status_code != 200: continue for j in range(1, 9): imgurl = url[:-5] + '{}.jpg'.format(j) response = requests.request('get', imgurl) if response.status_code != 200: continue ls.append(imgurl) ##下載 print("開始下載第{}-{}個英雄皮膚圖片>>>".format(i,j), end='') with open(path+str(i)+'-'+str(j)+'.png', 'wb') as f: f.write(response.content) print('======下載完成======') ##print(ls) ##鏈接寫入txt文件 for line in ls: fo.write(line+'\n') fo.close()
#!/usr/bin/env python # -*- encoding: utf-8 -*- import requests fo = open('R:/python123全國等考/wzry-jpg/pop-bd.txt', 'w') ls = [] for i in range(105,200): url = "https://pvp.qq.com/web201605/herodetail/{}.shtml".format(i) r = requests.request('get', url) if r.status_code != 200: continue r.encoding = 'gbk' name = r.text.split('cname')[1].split(',')[0].split("'")[1] text = r.text.split('pop-bd')[1].split('p>')[1][:-2] print("開始保存第{}個英雄背景故事>>>".format(i)) fo.write('【'+name+'】'+'\n'+text+'\n'+'-'*30) for i in range(501,516): url = "https://pvp.qq.com/web201605/herodetail/{}.shtml".format(i) r = requests.request('get', url) if r.status_code != 200: continue r.encoding = 'gbk' name = r.text.split('cname')[1].split(',')[0].split("'")[1] text = r.text.split('pop-bd')[1].split('p>')[1][:-2] print("開始保存第{}個英雄背景故事>>>".format(i)) fo.write('【'+name+'】'+'\n'+text+'\n'+'-'*30) ##鏈接寫入txt文件 fo.close()