# coding:utf-8 import re import urllib def get_content(url): """ Evilxr, """ html = urllib.urlopen(url) content = html.read() html.close() return content def get_images(info): """" Download Baidu pictures. <img class="BDE_Image" src="http:*****"> """ regex = r' class="BDE_Image" src="(.+?\.jpg)" ' pat = re.compile(regex) images_code = re.findall(pat, info) i = 0 for image_url in images_code: print image_url urllib.urlretrieve(image_url, '%s.jpg' % i) i = i +1 print len(images_code) info = get_content("http://tieba.baidu.com/p/2299704181") print get_images(info)
# 安裝 sudo pip install beautifulsoup4
# coding:utf-8 import urllib from bs4 import BeautifulSoup def get_content(url): """ Evilxr, """ html = urllib.urlopen(url) content = html.read() html.close() return content def get_images(info): """ 使用BeautifulSoup在網頁源碼中匹配圖片地址 """ soup = BeautifulSoup(info) all_img = soup.find_all('img', class_="BDE_Image" ) i = 1 for img in all_img: print img['src'] urllib.urlretrieve(img['src'], '%s.jpg' % i) i = i +1 print "一共下載了 ", len(all_img), "張圖片" info = get_content("http://tieba.baidu.com/p/3368845086") print get_images(info)