因爲js腳本的影響,咱們請求獲得的數據經常與網頁顯示的數據不同。而chrome插件xpath helper不能調試本地網頁,因而有了製造一個xpath解析器的想法。(粗略嘗試了一下,沒有問題,你們要是發現bug的話記得評論告訴我啊~)
工具:html
有關(一)、(二)部分的教程能夠參考:https://www.jb51.net/article/...python
將如下代碼添加到def setupUi後面chrome
# 設置按鈕控件 self.button_Get_html.clicked.connect(self.Button_Get_Html) self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)
如下分別是按鈕==Get Html==和按鈕 ==Xpath Parse== 的代碼:segmentfault
def Button_Get_Html(self): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36' } url = self.text_Web_Site.toPlainText().strip() if len(url): if url[0] == 'w': url = 'http://' + url session = requests.session() try: res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore') # 在text_HTML_Code中輸出返回內容 self.text_HTML_Code.setPlainText(res) except Exception as e: self.text_HTML_Code.setPlainText(e.__str__()) else: self.text_HTML_Code.setPlainText('網址不能爲空!') def Button_Xpath_Parse(self): self.text_Result.document().clear() xpath_syntax=self.text_Xpath_Syntax.toPlainText() html_code=self.text_HTML_Code.toPlainText() html=etree.HTML(html_code) try: results = html.xpath(xpath_syntax) num = 0 for result in results: self.text_Result.append('-'*60+'這裏是第 '+str(num)+' 個') # result 有兩種格式 try: self.text_Result.append(result.text) except Exception: self.text_Result.append(result) num=num+1 except Exception as e: self.text_Result.setPlainText(e.__str__())
if __name__ == '__main__': # 每一pyqt5應用程序必須建立一個應用程序對象。sys.argv參數是一個列表,從命令行輸入參數。 app = QtWidgets.QApplication(sys.argv) # QWidget部件是pyqt5全部用戶界面對象的基類。他爲QWidget提供默認構造函數。默認構造函數沒有父類。 w = QtWidgets.QWidget() ui = Ui_Asyu17_Xpath_Helper() ui.setupUi(w) w.show() # 系統exit()方法確保應用程序乾淨的退出 # 的exec_()方法有下劃線。由於執行是一個Python關鍵詞。所以,exec_()代替 sys.exit(app.exec_())
測試無問題後,可以使用pyinstaller將代碼編譯成可執行文件~微信
代碼:session
from PyQt5 import QtCore, QtGui, QtWidgets import sys import requests from lxml import etree requests.packages.urllib3.disable_warnings() class Ui_Asyu17_Xpath_Helper(object): def setupUi(self, Asyu17_Xpath_Helper): Asyu17_Xpath_Helper.setObjectName("Asyu17_Xpath_Helper") Asyu17_Xpath_Helper.resize(969, 905) self.button_Xpath_Parse = QtWidgets.QPushButton(Asyu17_Xpath_Helper) self.button_Xpath_Parse.setGeometry(QtCore.QRect(830, 860, 75, 31)) self.button_Xpath_Parse.setObjectName("button_Xpath_Parse") self.label = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label.setGeometry(QtCore.QRect(10, 10, 71, 16)) self.label.setFrameShape(QtWidgets.QFrame.StyledPanel) self.label.setScaledContents(False) self.label.setObjectName("label") self.label_2 = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label_2.setGeometry(QtCore.QRect(490, 10, 51, 16)) self.label_2.setFrameShape(QtWidgets.QFrame.StyledPanel) self.label_2.setScaledContents(False) self.label_2.setObjectName("label_2") self.label_3 = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label_3.setGeometry(QtCore.QRect(20, 860, 91, 31)) self.label_3.setObjectName("label_3") self.text_Xpath_Syntax = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper) self.text_Xpath_Syntax.setGeometry(QtCore.QRect(110, 860, 681, 31)) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(13) self.text_Xpath_Syntax.setFont(font) self.text_Xpath_Syntax.setReadOnly(False) self.text_Xpath_Syntax.setObjectName("text_Xpath_Syntax") self.button_Get_html = QtWidgets.QPushButton(Asyu17_Xpath_Helper) self.button_Get_html.setGeometry(QtCore.QRect(830, 820, 75, 31)) self.button_Get_html.setObjectName("button_Get_html") self.text_Web_Site = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper) self.text_Web_Site.setGeometry(QtCore.QRect(110, 820, 681, 31)) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(13) self.text_Web_Site.setFont(font) self.text_Web_Site.setReadOnly(False) self.text_Web_Site.setObjectName("text_Web_Site") self.label_4 = QtWidgets.QLabel(Asyu17_Xpath_Helper) self.label_4.setGeometry(QtCore.QRect(20, 820, 91, 31)) self.label_4.setObjectName("label_4") self.layoutWidget = QtWidgets.QWidget(Asyu17_Xpath_Helper) self.layoutWidget.setGeometry(QtCore.QRect(10, 30, 951, 781)) self.layoutWidget.setObjectName("layoutWidget") self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.text_HTML_Code = QtWidgets.QTextBrowser(self.layoutWidget) self.text_HTML_Code.setEnabled(True) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(12) self.text_HTML_Code.setFont(font) self.text_HTML_Code.setMouseTracking(False) self.text_HTML_Code.setTabletTracking(False) self.text_HTML_Code.setReadOnly(False) self.text_HTML_Code.setObjectName("text_HTML_Code") self.horizontalLayout.addWidget(self.text_HTML_Code) self.text_Result = QtWidgets.QTextBrowser(self.layoutWidget) font = QtGui.QFont() font.setFamily("Arial") font.setPointSize(12) self.text_Result.setFont(font) self.text_Result.setReadOnly(False) self.horizontalLayout.addWidget(self.text_Result) self.retranslateUi(Asyu17_Xpath_Helper) QtCore.QMetaObject.connectSlotsByName(Asyu17_Xpath_Helper) # 設置按鈕控件 self.button_Get_html.clicked.connect(self.Button_Get_Html) self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse) def retranslateUi(self, Asyu17_Xpath_Helper): _translate = QtCore.QCoreApplication.translate Asyu17_Xpath_Helper.setWindowTitle(_translate("Asyu17_Xpath_Helper", "Asyu17 Xpath Helper")) self.button_Xpath_Parse.setText(_translate("Asyu17_Xpath_Helper", "Xpath Parse")) self.label.setText(_translate("Asyu17_Xpath_Helper", "HTML Code:")) self.label_2.setText(_translate("Asyu17_Xpath_Helper", "Result:")) self.label_3.setText(_translate("Asyu17_Xpath_Helper", "Xpath Syntax:")) self.button_Get_html.setText(_translate("Asyu17_Xpath_Helper", "Get Html")) self.label_4.setText(_translate("Asyu17_Xpath_Helper", "Web Site:")) def Button_Get_Html(self): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36' } url = self.text_Web_Site.toPlainText().strip() if len(url): if url[0] == 'w': url = 'http://' + url session = requests.session() try: res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore') # 在text_HTML_Code中輸出返回內容 self.text_HTML_Code.setPlainText(res) except Exception as e: self.text_HTML_Code.setPlainText(e.__str__()) else: self.text_HTML_Code.setPlainText('網址不能爲空!') def Button_Xpath_Parse(self): self.text_Result.document().clear() xpath_syntax=self.text_Xpath_Syntax.toPlainText() html_code=self.text_HTML_Code.toPlainText() html=etree.HTML(html_code) try: results = html.xpath(xpath_syntax) num = 0 for result in results: self.text_Result.append('-'*60+'這裏是第 '+str(num)+' 個') # result 有兩種格式 try: self.text_Result.append(result.text) except Exception: self.text_Result.append(result) num=num+1 except Exception as e: self.text_Result.setPlainText(e.__str__()) if __name__ == '__main__': # 每一pyqt5應用程序必須建立一個應用程序對象。sys.argv參數是一個列表,從命令行輸入參數。 app = QtWidgets.QApplication(sys.argv) # QWidget部件是pyqt5全部用戶界面對象的基類。他爲QWidget提供默認構造函數。默認構造函數沒有父類。 w = QtWidgets.QWidget() ui = Ui_Asyu17_Xpath_Helper() ui.setupUi(w) w.show() # 系統exit()方法確保應用程序乾淨的退出 # 的exec_()方法有下劃線。由於執行是一個Python關鍵詞。所以,exec_()代替 sys.exit(app.exec_())