#coding=utf-8 import requests from lxml import etree import sys reload(sys) sys.setdefaultencoding('utf-8') url_start = 'http://www.xxxxxx.com/yyy/' url_base = 'http://www.xxxxx.com' for page in range(1,208): if page==1: url = url_start+'7.html' else: url = url_start+'7-'+str(page)+'.html' print url url_lists = requests.get(url).text print url_lists selector = etree.HTML(url_lists) content_lists = selector.xpath('//div[@class="art"]/ul/li') print content_lists for each_url in content_lists: imgs_url = url_base + each_url.xpath('a/@href')[0] html = requests.get(imgs_url).text selector = etree.HTML(html) content = selector.xpath('//div[@class="artbody imgbody"]/p') for each in content: imgs = each.xpath('img/@src')[0] headers = { "Accept":"text/html,application/xhtml+xml,application/xml;", "Accept-Encoding":"gzip", "Accept-Language":"zh-CN,zh;q=0.8", "Referer":"http://www.example.com/", "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36" } r = requests.get(imgs,headers=headers) img_name = imgs.split('/')[-1] with open("./imgs/"+img_name, "wb") as code: code.write(r.content)