在閱讀爬蟲-漫畫喵的100行逆襲 的代碼後,觀察龍族三漫畫圖片地址規律,簡寫專門下載龍族三漫畫的小爬蟲。html
#coding=utf-8 import os import urllib2 #簡單下載器 def download(url, save_path): try: with open(save_path, 'wb') as fp: fp.write(urllib2.urlopen(url).read()) except Exception, et: print(et) #定義存儲目錄 save_folder = ".\longzu" if not os.path.exists(save_folder): os.mkdir(save_folder) #漫畫圖片連接,可經過格式化字符串的方式獲取新的圖片連接 url = 'http://mhpic.zymk.cn/comic/L%2F%E9%BE%99%E6%97%8F%E2%85%A2%2F{0}%E8%AF%9D%2F{1}.jpg-mht.middle' #初始章節 chapter = 1 #循環下載章節 while chapter < 47: #生成章節目錄 folder = os.path.join(save_folder,u"第 %d 話"%chapter) if not os.path.exists(folder): os.mkdir(folder) index = 1 while True: image_url = url.format('%.2d'%chapter,index) save_image_name = os.path.join(folder,"%.2d"%index+"."+"jpg") #判斷是否到達章節最後一頁 try: tmp = urllib2.urlopen(image_url).read() except urllib2.HTTPError: break print "downloading:%s from url:%s" % (save_image_name,image_url) download(image_url, save_image_name) index += 1 chapter += 1