Python 抓取圖片例子實測

時間 2019-11-12

原文原文鏈接

# -*- coding: utf-8 -*-
import urllib
import re
import time
import os
import socket
#顯示下載進度
def schedule(a,b,c):
# a:已經下載的數據塊
# b:數據塊的大小
# c:遠程文件的大小
per = 100.0 * a * b / c
if per > 100 :
per = 100
print '%.2f%%' % per

def getHtml(url):
socket.setdefaulttimeout(5)
page = urllib.urlopen(url)
html = page.read()
return html

def downloadImg(html,prei):
reg = r'src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre, html)
#定義文件夾的名字
t = time.localtime(time.time())
foldername = prei + str(t.__getattribute__("tm_year"))+"-"+str(t.__getattribute__("tm_mon"))+"-"+str(t.__getattribute__("tm_mday"))
picpath = '/Users/chenbing/Documents/workspace/python/jpg/%s' % (foldername) #下載到的本地目錄

if not os.path.exists(picpath): #路徑不存在時建立一個
os.makedirs(picpath)
x = 0
for imgurl in imglist:
target = picpath+'\\%s.jpg' % x
print 'Downloading image to location: ' + target + '\nurl=' + imgurl
image = urllib.urlretrieve(imgurl, target, schedule)
x += 1
return image;



if __name__ == '__main__':
print ''''' start .........'''
for i in range(100000, 999999):
   strs = "http://tieba.baidu.com/p/5039"+str(i)
   try:
       html = getHtml(strs)
       downloadImg(html,str(i))
   except Exception, e:
       print strs+" error "html

print "Download has finished."
python

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。

Python 抓取圖片例子 實測

Python 抓取圖片例子實測