【自制實用小工具】——一、Xpath解析器

【自制實用小工具】——一、Xpath解析器

因爲js腳本的影響,咱們請求獲得的數據經常與網頁顯示的數據不同。而chrome插件xpath helper不能調試本地網頁,因而有了製造一個xpath解析器的想法。(粗略嘗試了一下,沒有問題,你們要是發現bug的話記得評論告訴我啊~)
工具:html

  1. PyQt5 庫
  2. Qt designer
  3. sys 庫
  4. requests 庫
  5. lxml 庫

步驟:

(一)用Qt designer設計界面

界面

(二)將.ui文件轉換爲.py文件

有關(一)、(二)部分的教程能夠參考:https://www.jb51.net/article/...python

(三)連接按鈕

將如下代碼添加到def setupUi後面chrome

# 設置按鈕控件
        self.button_Get_html.clicked.connect(self.Button_Get_Html)
        self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)

(四)按鈕事件

如下分別是按鈕==Get Html==和按鈕 ==Xpath Parse== 的代碼:segmentfault

def Button_Get_Html(self):

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36'
        }
        url = self.text_Web_Site.toPlainText().strip()
        if len(url):
            if url[0] == 'w':
                url = 'http://' + url
            session = requests.session()
            try:
                res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore')
                # 在text_HTML_Code中輸出返回內容
                self.text_HTML_Code.setPlainText(res)
            except Exception as e:
                self.text_HTML_Code.setPlainText(e.__str__())
        else:
            self.text_HTML_Code.setPlainText('網址不能爲空!')

    def Button_Xpath_Parse(self):
        self.text_Result.document().clear()

        xpath_syntax=self.text_Xpath_Syntax.toPlainText()
        html_code=self.text_HTML_Code.toPlainText()
        html=etree.HTML(html_code)
        try:
            results = html.xpath(xpath_syntax)
            num = 0
            for result in results:
                self.text_Result.append('-'*60+'這裏是第 '+str(num)+' 個')
                # result 有兩種格式
                try:
                    self.text_Result.append(result.text)
                except Exception:
                    self.text_Result.append(result)
                num=num+1
        except Exception as e:
            self.text_Result.setPlainText(e.__str__())

(五)初始化界面

if __name__ == '__main__':
    # 每一pyqt5應用程序必須建立一個應用程序對象。sys.argv參數是一個列表,從命令行輸入參數。
    app = QtWidgets.QApplication(sys.argv)
    # QWidget部件是pyqt5全部用戶界面對象的基類。他爲QWidget提供默認構造函數。默認構造函數沒有父類。
    w = QtWidgets.QWidget()
    ui = Ui_Asyu17_Xpath_Helper()
    ui.setupUi(w)
    w.show()

    # 系統exit()方法確保應用程序乾淨的退出
    # 的exec_()方法有下劃線。由於執行是一個Python關鍵詞。所以,exec_()代替
    sys.exit(app.exec_())

結果展現:

測試無問題後,可以使用pyinstaller將代碼編譯成可執行文件~微信

結果展現
代碼:session

from PyQt5 import QtCore, QtGui, QtWidgets
import sys
import requests
from lxml import etree

requests.packages.urllib3.disable_warnings()

class Ui_Asyu17_Xpath_Helper(object):
    def setupUi(self, Asyu17_Xpath_Helper):
        Asyu17_Xpath_Helper.setObjectName("Asyu17_Xpath_Helper")
        Asyu17_Xpath_Helper.resize(969, 905)
        self.button_Xpath_Parse = QtWidgets.QPushButton(Asyu17_Xpath_Helper)
        self.button_Xpath_Parse.setGeometry(QtCore.QRect(830, 860, 75, 31))
        self.button_Xpath_Parse.setObjectName("button_Xpath_Parse")
        self.label = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label.setGeometry(QtCore.QRect(10, 10, 71, 16))
        self.label.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.label.setScaledContents(False)
        self.label.setObjectName("label")
        self.label_2 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label_2.setGeometry(QtCore.QRect(490, 10, 51, 16))
        self.label_2.setFrameShape(QtWidgets.QFrame.StyledPanel)
        self.label_2.setScaledContents(False)
        self.label_2.setObjectName("label_2")
        self.label_3 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label_3.setGeometry(QtCore.QRect(20, 860, 91, 31))
        self.label_3.setObjectName("label_3")
        self.text_Xpath_Syntax = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper)
        self.text_Xpath_Syntax.setGeometry(QtCore.QRect(110, 860, 681, 31))
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(13)
        self.text_Xpath_Syntax.setFont(font)
        self.text_Xpath_Syntax.setReadOnly(False)
        self.text_Xpath_Syntax.setObjectName("text_Xpath_Syntax")
        self.button_Get_html = QtWidgets.QPushButton(Asyu17_Xpath_Helper)
        self.button_Get_html.setGeometry(QtCore.QRect(830, 820, 75, 31))
        self.button_Get_html.setObjectName("button_Get_html")
        self.text_Web_Site = QtWidgets.QTextBrowser(Asyu17_Xpath_Helper)
        self.text_Web_Site.setGeometry(QtCore.QRect(110, 820, 681, 31))
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(13)
        self.text_Web_Site.setFont(font)
        self.text_Web_Site.setReadOnly(False)
        self.text_Web_Site.setObjectName("text_Web_Site")
        self.label_4 = QtWidgets.QLabel(Asyu17_Xpath_Helper)
        self.label_4.setGeometry(QtCore.QRect(20, 820, 91, 31))
        self.label_4.setObjectName("label_4")
        self.layoutWidget = QtWidgets.QWidget(Asyu17_Xpath_Helper)
        self.layoutWidget.setGeometry(QtCore.QRect(10, 30, 951, 781))
        self.layoutWidget.setObjectName("layoutWidget")
        self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.text_HTML_Code = QtWidgets.QTextBrowser(self.layoutWidget)
        self.text_HTML_Code.setEnabled(True)
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(12)
        self.text_HTML_Code.setFont(font)
        self.text_HTML_Code.setMouseTracking(False)
        self.text_HTML_Code.setTabletTracking(False)
        self.text_HTML_Code.setReadOnly(False)

        self.text_HTML_Code.setObjectName("text_HTML_Code")
        self.horizontalLayout.addWidget(self.text_HTML_Code)
        self.text_Result = QtWidgets.QTextBrowser(self.layoutWidget)
        font = QtGui.QFont()
        font.setFamily("Arial")
        font.setPointSize(12)
        self.text_Result.setFont(font)
        self.text_Result.setReadOnly(False)

        self.horizontalLayout.addWidget(self.text_Result)

        self.retranslateUi(Asyu17_Xpath_Helper)
        QtCore.QMetaObject.connectSlotsByName(Asyu17_Xpath_Helper)

        # 設置按鈕控件
        self.button_Get_html.clicked.connect(self.Button_Get_Html)
        self.button_Xpath_Parse.clicked.connect(self.Button_Xpath_Parse)

    def retranslateUi(self, Asyu17_Xpath_Helper):
        _translate = QtCore.QCoreApplication.translate
        Asyu17_Xpath_Helper.setWindowTitle(_translate("Asyu17_Xpath_Helper", "Asyu17 Xpath Helper"))
        self.button_Xpath_Parse.setText(_translate("Asyu17_Xpath_Helper", "Xpath Parse"))
        self.label.setText(_translate("Asyu17_Xpath_Helper", "HTML Code:"))
        self.label_2.setText(_translate("Asyu17_Xpath_Helper", "Result:"))
        self.label_3.setText(_translate("Asyu17_Xpath_Helper", "Xpath Syntax:"))
        self.button_Get_html.setText(_translate("Asyu17_Xpath_Helper", "Get Html"))
        self.label_4.setText(_translate("Asyu17_Xpath_Helper", "Web Site:"))

    def Button_Get_Html(self):

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36'
        }
        url = self.text_Web_Site.toPlainText().strip()
        if len(url):
            if url[0] == 'w':
                url = 'http://' + url
            session = requests.session()
            try:
                res = session.get(url=url, headers=headers, verify=False).content.decode('utf-8','ignore')
                # 在text_HTML_Code中輸出返回內容
                self.text_HTML_Code.setPlainText(res)
            except Exception as e:
                self.text_HTML_Code.setPlainText(e.__str__())
        else:
            self.text_HTML_Code.setPlainText('網址不能爲空!')

    def Button_Xpath_Parse(self):
        self.text_Result.document().clear()

        xpath_syntax=self.text_Xpath_Syntax.toPlainText()
        html_code=self.text_HTML_Code.toPlainText()
        html=etree.HTML(html_code)
        try:
            results = html.xpath(xpath_syntax)
            num = 0
            for result in results:
                self.text_Result.append('-'*60+'這裏是第 '+str(num)+' 個')
                # result 有兩種格式
                try:
                    self.text_Result.append(result.text)
                except Exception:
                    self.text_Result.append(result)
                num=num+1
        except Exception as e:
            self.text_Result.setPlainText(e.__str__())

if __name__ == '__main__':
    # 每一pyqt5應用程序必須建立一個應用程序對象。sys.argv參數是一個列表,從命令行輸入參數。
    app = QtWidgets.QApplication(sys.argv)
    # QWidget部件是pyqt5全部用戶界面對象的基類。他爲QWidget提供默認構造函數。默認構造函數沒有父類。
    w = QtWidgets.QWidget()
    ui = Ui_Asyu17_Xpath_Helper()
    ui.setupUi(w)
    w.show()

    # 系統exit()方法確保應用程序乾淨的退出
    # 的exec_()方法有下劃線。由於執行是一個Python關鍵詞。所以,exec_()代替
    sys.exit(app.exec_())

==微信公衆號:==

小術快跑

相關文章
相關標籤/搜索