載入爬蟲模塊html
from requests_html import HTMLSession #載入爬蟲模塊
建立session對象python
from requests_html import HTMLSession #載入爬蟲模塊 session =HTMLSession() #建立完畢
得到發現百度圖片搜索規律併發起請求並匹配到圖片的urlsession
http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=`咱們搜圖片的關鍵字`併發
from requests_html import HTMLSession #載入爬蟲模塊 session =HTMLSession() #建立完畢 #拿二傻子爲了 response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子') #獲取咱們圖片的url的正則匹配格式 img_url_regex = '"thumbURL":"{}",' #解析並獲取圖片url_list img_url_list = response.html.search_all(img_url_regex)
訪問圖片url而且保存下來python爬蟲
from requests_html import HTMLSession #載入爬蟲模塊 session =HTMLSession() #建立完畢 #拿二傻子爲了 response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子') #獲取咱們圖片的url的正則匹配格式 img_url_regex = '"thumbURL":"{}",' #解析並獲取圖片url_list img_url_list = response.html.search_all(img_url_regex) mun=0 for url in img_url_list: mun+=1 #訪問圖片連接 response= session.get(url[0]) #保存二進制並保存至本地 with open(f'第{mun}張.jpg','wb') as fw: fw.write(response.content)
類的封裝ui
from requests_html import HTMLSession class BaiDuImg: session = HTMLSession() img_url_regex = '"thumbURL":"{}",' url='' img_url_list =[] def get_search(self): search=input() self.url=f'http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word={search}' def get_img_url_list(self): response=self.session.get(self.url) self.img_url_list = response.html.search_all(img_url_regex) def save_img(self): mun = 0 for url in self.img_url_list: mun += 1 # 訪問圖片連接 response = self.session.get(url[0]) # 保存二進制並保存至本地 with open(f'第{mun}張.jpg', 'wb') as fw: fw.write(response.content) def run(self): self.get_search() self.get_img_url_list() self.save_img() if __name__ == '__main__': baidu=BaiDuImg() baidu.run()