因要天天去查找漏洞信息,來判斷架構和應用服務有沒有新漏洞被發現,如有 則需修復升級。因此寫了一個去漏洞庫平臺經過關鍵字爬取數據生成日誌文件,這三個平臺,就美國國家信息安全漏洞庫地址會時不時出現超時狀況。若出現超時,可多試兩次,三個平臺檢索出的漏洞差很少,寫的很差,僅供參考html
python版本3.7 pip安裝requests便可 #coding=utf-8 import requests as r import re import time import datetime #爬取國家信息安全漏洞平臺 class gjxxaqpt: def get_404(self,url,keyword): #定義提交數據 qcvCname 檢索的詞,pageno 頁數 通常是抓取第一頁 data = {"qcvCname":keyword,"pageno":1} #post數據 result = r.post(url,data=data).text #正則匹配信息 filter_result = re.findall('<li style=".*?class="a_title2" >\r\n \t\t (.*?)</a>.*?<p><a href="(.*?)" target="_blank">(.*?)</a>.*?<img title="(.*?)" src=".*?<br/ >(.*?)\r\n\t\t\t\t\t\t </div>.*?</li>',result,re.S) return filter_result def get_404_mes(self,url): header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'} try: result = r.get(url,headers=header).text filter_result = re.findall('<div class="d_ldjj">.*?</p><p style="text-indent:2em">\n(.*?)\r\n\t\t\t</p>',result,re.S) if filter_result is not None: return filter_result[0] else: return "" except: print("鏈接超時"+url) def write_file(self,keyword,date_time): #定義post的url url = "http://www.cnnvd.org.cn/web/vulnerability/queryLds.tag" #定義後面組合信息須要的域名 url_domain = "http://www.cnnvd.org.cn" #存放最後結果數據的數組 mes_list = [] #循環檢索關鍵詞 for keyword in keylist: try: #調用方法獲取檢索的結果 get_404_re = self.get_404(url,keyword) #循環結果,拼接成字符串,寫入log文件 for res in get_404_re: #判斷包含本年日期的漏洞 if date_time in res[4]: mes_url = url_domain + res[1] try: message = self.get_404_mes(mes_url) mes = res[0] + " | " "漏洞編號:" + res[2] + " | " + "等級:" + res[3] + " | " + "時間:" + res[4] + " | " + "詳情地址:" + mes_url + " | " + "漏洞簡介:" + message mes_list.append(mes) except: print("timeout: "+mes_url) except: print("timeout:"+url+","+"keyword") return mes_list #cve中文漏洞信息庫 - scap中文社區 class cve_scap: #獲取全部漏洞集合 def get_cve_404(self,url,keyword): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'} #定義提交數據 qcvCname 檢索的詞,pageno 頁數 通常是抓取第一頁 data = {"search_type":"t_keyword","keyword":keyword} #post數據 result = r.get(url,params=data).text filter_result = re.findall("<td class='hidden-xs'>.*?<a href=(.*?)>\n (.*?)\n </a>.*?<td class='hidden-xs hidden-sm'>(.*?)</td>.*?title='(.*?)' class='grade",result,re.S) return filter_result #對單個漏洞信息獲取 def get_cve_404_mes(self,url): header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'} try: result = r.get(url,headers=header).text filter_result = re.findall("pad30T pad30B mrg0B' style='word-wrap: break-word;'>\n (.*?)</p>",result,re.S) if filter_result is not None: return filter_result[0] else: return "" except: print("timeout: " + url) #信息獲取 def write_file(self,keylist,date_time): #定義post的url url = "http://cve.scap.org.cn/vulns/1" #定義數組,存放信息 mes_list = [] for keyword in keylist: #爬取網站 html_filter = self.get_cve_404(url,keyword) #定義後面組合信息須要的域名 url_domain = "http://cve.scap.org.cn" for res in html_filter: if date_time in res[2]: try: mes_url = url_domain + res[0].strip('"') message = self.get_cve_404_mes(mes_url) mes = "漏洞編號:" + res[1] + " | " + "等級:" + res[3] + " | " + "時間:" + res[2] + " | " + "詳情地址:" + mes_url + " | " + "漏洞簡介:" + message.replace("\n","") mes_list.append(mes) except: print("timeout: "+mes_url) return mes_list #美國國家信息安全漏洞庫 class nvd_nist: #獲取全部漏洞集合 def get_nvd_404(self,url,keyword): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'} #定義提交數據 qcvCname 檢索的詞,pageno 頁數 通常是抓取第一頁 data = {"form_type":"Basic","results_type":"overview","query":keyword,"search_type":"all"} #關閉ssl告警提示 r.packages.urllib3.disable_warnings() #post數據 result = r.get(url,params=data,verify=False).text filter_result = re.findall("<tr data-testid=\"vuln-row.*?<a href=\"(.*?)\" id=\".*?data-testid=\"vuln-detail-link-[0-9]{1,2}\">(.*?)</a></strong><br/>.*?<p data-testid='vuln-summary-[0-9]{1,2}'>(.*?)</p>.*?<span data-testid='vuln-published-on-[0-9]{1,2}'>(.*?)</span>",result,re.S) return filter_result def write_file(self,keylist,date_time): #查找漏洞的url地址 url = "https://nvd.nist.gov/vuln/search/results" #拼接url的地址 url_dom = "https://nvd.nist.gov" #存儲最後結果的數組 mes_list = [] #循環獲取關鍵字的漏洞信息 for keyword in keylist: try: filter_html = self.get_nvd_404(url,"nginx") for res in filter_html: url_domain = url_dom + res[0] #對英文的時間格式進行轉換 eng_time = res[3] if "AM" in eng_time: up_time = eng_time.split("AM")[0] elif "PM" in eng_time: up_time = eng_time.split("PM")[0] else: print("時間判斷有誤") #我獲取到的時間是英文的時間格式,須要轉換爲數字時間格式,這裏時間字符串和裏面的時間格式要保持一次,差一個空格,也不行 time_format=str(datetime.datetime.strptime(up_time,'%B %d, %Y; %H:%M:%S ')) if date_time in time_format: mes = "漏洞編號:" + res[1] + " | " + "時間:" + time_format + " | " + "詳情地址:" + url_domain + " | " + "漏洞簡介:" + res[2] mes_list.append(mes) except: print("timeout:" + url + "," + keyword) return mes_list if __name__ == "__main__": #須要查找的關鍵字數組 keylist=['nginx','openssl','openssh'] #獲取本年的日期 date_time = time.strftime("%Y",time.localtime()) #打開寫入log文件 files = open("404_message.log","w+",encoding='utf-8') #獲取國家信息漏洞庫 guojia = gjxxaqpt() files.write("#國家信息漏洞庫:\n") for i in guojia.write_file(keylist,date_time): files.write(i+"\n") files.write("\n") #cve中文漏洞信息庫 - scap中文社區 查找 cve = cve_scap() files.write("#cve中文漏洞信息庫:\n") for i in cve.write_file(keylist,date_time): files.write(i+"\n") files.write("\n") #美國國家信息安全漏洞庫 查找 nvd = nvd_nist() files.write("#美國國家信息安全漏洞庫:\n") for i in nvd.write_file(keylist,date_time): files.write(i+"\n") files.write("\n") files.close()