# -*- coding: utf-8 -*-
import urllib
import re
import time
import os
import socket
#顯示下載進度
def schedule(a,b,c):
# a:已經下載的數據塊
# b:數據塊的大小
# c:遠程文件的大小
per = 100.0 * a * b / c
if per > 100 :
per = 100
print '%.2f%%' % per
def getHtml(url):
socket.setdefaulttimeout(5)
page = urllib.urlopen(url)
html = page.read()
return html
def downloadImg(html,prei):
reg = r'src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre, html)
#定義文件夾的名字
t = time.localtime(time.time())
foldername = prei + str(t.__getattribute__("tm_year"))+"-"+str(t.__getattribute__("tm_mon"))+"-"+str(t.__getattribute__("tm_mday"))
picpath = '/Users/chenbing/Documents/workspace/python/jpg/%s' % (foldername) #下載到的本地目錄
if not os.path.exists(picpath): #路徑不存在時建立一個
os.makedirs(picpath)
x = 0
for imgurl in imglist:
target = picpath+'\\%s.jpg' % x
print 'Downloading image to location: ' + target + '\nurl=' + imgurl
image = urllib.urlretrieve(imgurl, target, schedule)
x += 1
return image;
if __name__ == '__main__':
print ''''' start .........'''
for i in range(100000, 999999):
strs = "http://tieba.baidu.com/p/5039"+str(i)
try:
html = getHtml(strs)
downloadImg(html,str(i))
except Exception, e:
print strs+" error "html
print "Download has finished."
python