import re import urllib.request x=0 def getHtml(url): page = urllib.request.urlopen(url, None, 10000) html = page.read() return html def getImg(html): global x reg = 'alt=".+?" src="(.+?\.jpg)"' imgre = re.compile(reg) imglist = re.findall(imgre,html.decode('utf-8')) for imgurl in imglist: urllib.request.urlretrieve(re.sub(r',\d+,\d+',',800,450',imgurl),"img/%s.jpg" % x) print ("\n"+re.sub(r',\d+,\d+',',800,450',imgurl)+"========"+"img/%s.jpg" % x) x+=1 print ('Starting...') pages = range(1,9) for p in pages: html = getHtml('http://m.lovebizhi.com/category/7655/%d/' % p) print ("\n-------------------------page:%d-------------------------------" % p) getImg(html) print ("\nDone!")
以上代碼運行環境,python3.4.3,此處還有一個問題就是py3.4.3要有自動建立目錄的權限,沒有就在腳本的同目錄下建立img目錄。圖片下載到哪裏也能夠自定義html
很是感謝 雪梨蘋果 python