Python 抓取圖片例子 實測

# -*- coding: utf-8 -*-  
import urllib  
import re  
import time  
import os  
import socket  
#顯示下載進度  
def schedule(a,b,c):  
#    a:已經下載的數據塊 
#    b:數據塊的大小 
#    c:遠程文件的大小 
    per = 100.0 * a * b / c  
    if per > 100 :  
        per = 100  
    print '%.2f%%' % per  
  
def getHtml(url):  
    socket.setdefaulttimeout(5)
    page = urllib.urlopen(url)  
    html = page.read()  
    return html  
  
def downloadImg(html,prei):  
    reg = r'src="(.+?\.jpg)"'  
    imgre = re.compile(reg)  
    imglist = re.findall(imgre, html)  
    #定義文件夾的名字  
    t = time.localtime(time.time())  
    foldername = prei + str(t.__getattribute__("tm_year"))+"-"+str(t.__getattribute__("tm_mon"))+"-"+str(t.__getattribute__("tm_mday"))  
    picpath = '/Users/chenbing/Documents/workspace/python/jpg/%s' % (foldername) #下載到的本地目錄  
      
    if not os.path.exists(picpath):   #路徑不存在時建立一個  
        os.makedirs(picpath)     
    x = 0  
    for imgurl in imglist:  
        target = picpath+'\\%s.jpg' % x  
        print 'Downloading image to location: ' + target + '\nurl=' + imgurl  
        image = urllib.urlretrieve(imgurl, target, schedule)  
        x += 1  
    return image;  
  
      
      
if __name__ == '__main__':  
    print '''''  start  .........'''  
    for i in range(100000, 999999):
    strs = "http://tieba.baidu.com/p/5039"+str(i)
    try:
        html = getHtml(strs)
        downloadImg(html,str(i))  
    except Exception, e:
        print strs+" error "html

    print "Download has finished."  
 python

相關文章
相關標籤/搜索