1.網站連接:https://www.cnvd.org.cn/shareData/listhtml
2.須要下載的頁面文件:
python
3.該頁面須要登錄,而後批量下載共享漏洞文件,咱們就經過cookie來實現。cookie
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Date: 2019-08-17 Author: Bob Description: python爬取xml文件 """ import requests from bs4 import BeautifulSoup def cnvd_spider(): url = 'https://www.cnvd.org.cn/shareData/list?max=240&offset=0' headers = { "Cookie": "__jsluid_s=65d5e7902f04498e89b16e93fb010b3c; __jsluid_h=1ab428e655aee36ac3c9835db29b6714; JSESSIONID=91BB91B37543D365AA64895EDFCD828F; __jsl_clearance=1566003116.655|0|CYPFsKirGYBG12qtoOrS5Kq1rM0%3D", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36", } html = requests.get(url=url, headers=headers).text soup = BeautifulSoup(html, 'lxml') links = soup.find_all('a', attrs={'title': '下載xml'}) for link in links: url = 'https://www.cnvd.org.cn' + link.get('href') file_name = link.get_text() html_data = requests.get(url=url, headers=headers) with open(file_name, 'w') as f: f.write(html_data.content) if __name__ == '__main__': cnvd_spider()