路過的關注一下哈html
要求:ui
代碼:url
import requests from lxml import etree import os def save(img_url, desc): response = requests.get(img_url) if not os.path.exists('images'): os.makedirs('images') if not os.path.exists('text'): os.makedirs('text') img_url = img_url.split('/') # split方法--- 字符串切割成列表 image_file_name = img_url[len(img_url) - 1] file_name = image_file_name.replace('jpg', 'txt') with open('images/' + image_file_name, 'wb') as f: f.write(response.content) with open('text/' + file_name, 'w') as f: f.write(desc) print('保存成功', image_file_name) print('保存成功', file_name) def main(): url = 'http://www.atguigu.com/teacher.shtml' headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" " Chrome/65.0.3325.146 Safari/537.36"} response = requests.get(url, headers=headers) # html = response.content.decode() html_obj = etree.HTML(response.content) # 獲得圖片 result_list = html_obj.xpath('//div[@class="teacher_content"]/img/@src') for i in range(len(result_list)): # 拼接圖片路徑 img_url = 'http://www.atguigu.com/' + result_list[i] # 老師簡介信息 result_list1 = html_obj.xpath('//div[@class="teacher_content"][' + str(i + 1) + ']/text()') desc = "".join(result_list1).lstrip() print(img_url) save(img_url, desc) if __name__ == '__main__': main()
運行效果:spa