pyqt實現CSDN博客訪客數編程自動化增長GUI


前些天在逛論壇的時候忽然發現了一篇文章,標題是經過編程自動化提升本身博客訪問量的。我想了下,忽然感受能夠用學過的Python的簡單知識來實現這一目的。主要原理就是BeautifulSoup+urllib的組合,經過BS解析網頁,獲取目錄,而後深刻,獲取文章的url,經過urllib.request模塊嘗試鏈接CSDN的服務器。說幹就幹html

from bs4 import BeautifulSoup

import urllib.request
import urllib.parse
import sys
import time


#運行過程當中的日誌函數
def LOG(*argv):
    sys.stderr.write(*argv)
    sys.stderr.write('\n')

class Grab():
    url = ''
    soup = None

    #讀取當前網頁的源代碼數據返回
    def GetPage(self, url):
        self.url = url
        LOG('input url is: %s' % self.url)
        req = urllib.request.Request(url, headers={'User-Agent' : "Magic Browser"})
        try:
            page = urllib.request.urlopen(req)
        except:
            return
        tem = page.read()
        if not tem:
            print('GetPage failed!')
            sys.exit()
        return tem

    #獲取目錄頁面下的文章url集合
    def ExtractInfo(self,buf):
        try:
            self.soup = BeautifulSoup(buf,'html.parser')
        except:
            LOG('soup failed in ExtractInfo :%s' % self.url)
            return
        try:
            #經過BS模塊能夠很方便的獲取想要的信息
            items = self.soup.findAll(attrs={'class':'link_title'})
        except:
            LOG('failed on find items:%s' % self.url)
            return
        links = []
        for item in items:
            linkobj = item.findAll('a')
            for it in linkobj:
                link = it['href']
                links.append(link)
        return links

    #獲取全部文章的目錄頁面url集合
    def GetPageUrl(self,buf):
        pages = set()
        self.soup = BeautifulSoup(buf,'html.parser')
        pageInfo=self.soup.find(attrs={'id':'papelist'})
        #若是當前文章數量只有一頁
        if not pageInfo:
            return None
        pagelinks = pageInfo.findAll('a')
        for link  in pagelinks:
            pages.add('http://blog.csdn.net/'+link['href'])
        return pages

    #獲取當前訪問文章的訪問數、文章標題
    def GetCurViewerPoint(self,buf):
        self.soup = BeautifulSoup(buf,'html.parser')
        pointobj = self.soup.find(attrs={'class':'link_view'})
        title = self.soup.find(attrs={'class':'link_title'})
        return title.get_text().strip()+' '+pointobj.get_text()

grab = Grab()

#buf是當前頁面通過轉換以後的網頁源代碼
buf = grab.GetPage('http://blog.csdn.net/peihaozhu')

#pages中存放的是目錄頁面url集合
pages = ['http://blog.csdn.net/peihaozhu',]

#先從入口進入,若是文章數量不夠,文章的目錄頁面只有一頁
tem = grab.GetPageUrl(buf)
if not tem:
    pass
else:
    pages+=tem

#articles中存放全部的文章url集合
articles=set()

for page in pages:
    buf = grab.GetPage(page)
    links = grab.ExtractInfo(buf)
    for url in links:
        articles.add('http://blog.csdn.net/'+url)

#經過url.request訪問文章
for url in articles:
    for i in range(1,11):
        buf=grab.GetPage(url)
        print('第'+str(i)+'次訪問 '+grab.GetCurViewerPoint(buf))
        #每次訪問以後停歇300ms
        time.sleep(0.3)

用PyQt5將程序的大體控件擺放完成了:

將pyqt生成的ui文件直接經過命令生成.py文件python

pyuic5.bat -o layout.py untitled.ui

#代碼以下
# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'untitled.ui'
#
# Created by: PyQt5 UI code generator 5.5.1
#
# WARNING! All changes made in this file will be lost!

from PyQt5 import QtCore, QtGui, QtWidgets

class Ui_Form(object):
    def setupUi(self, Form):
        Form.setObjectName("Form")
        self.label = QtWidgets.QLabel(Form)
        self.label.setGeometry(QtCore.QRect(20, 30, 81, 21))
        self.label.setObjectName("label")
        self.username = QtWidgets.QPlainTextEdit(Form)
        self.username.setGeometry(QtCore.QRect(110, 20, 341, 41))
        self.username.setObjectName("username")
        self.label_2 = QtWidgets.QLabel(Form)
        self.label_2.setGeometry(QtCore.QRect(30, 80, 61, 31))
        self.label_2.setObjectName("label_2")
        self.times = QtWidgets.QPlainTextEdit(Form)
        self.times.setGeometry(QtCore.QRect(110, 80, 151, 41))
        self.times.setObjectName("times")
        self.beginBtn = QtWidgets.QPushButton(Form)
        self.beginBtn.setGeometry(QtCore.QRect(300, 80, 61, 41))
        self.beginBtn.setObjectName("beginBtn")
        self.progressBar = QtWidgets.QProgressBar(Form)
        self.progressBar.setGeometry(QtCore.QRect(30, 350, 461, 41))
        self.progressBar.setProperty("value", 24)
        self.progressBar.setObjectName("progressBar")
        self.listView = QtWidgets.QListView(Form)
        self.listView.setGeometry(QtCore.QRect(30, 180, 431, 151))
        self.listView.setObjectName("listView")
        self.info = QtWidgets.QLabel(Form)
        self.info.setGeometry(QtCore.QRect(30, 140, 421, 31))
        self.info.setText("")
        self.info.setObjectName("info")
        self.exitBtn = QtWidgets.QPushButton(Form)
        self.exitBtn.setGeometry(QtCore.QRect(390, 80, 61, 41))
        self.exitBtn.setObjectName("exitBtn")

        self.retranslateUi(Form)
        self.exitBtn.clicked.connect(Form.close)
        QtCore.QMetaObject.connectSlotsByName(Form)

    def retranslateUi(self, Form):
        _translate = QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "Form"))
        self.label.setText(_translate("Form", " CSDN用戶名"))
        self.label_2.setText(_translate("Form", "設置次數"))
        self.beginBtn.setText(_translate("Form", "Start"))
        self.exitBtn.setText(_translate("Form", "Exit"))

遇到的一些問題編程

在此次編寫GUI的過程當中,我遇到了原來沒有的問題。 服務器

以往的時候,如上篇文章,經過Python的QR模塊生成QR二維碼,由於邏輯很是簡單,只是單純的將所須要轉換的數據變換成爲相應的0、1二進制碼,而後放到圖片中的相應位置上,因此不會花費太多的時間,邏輯部分與界面部分就直接寫在了一塊兒沒有問題。 markdown

此次剛開始的時候,我也沒注意,直接就仍是寫在一塊,因爲牽扯到了url網絡鏈接部分,因此不可避免的出現了阻塞現象。幾乎在全部的GUI設計中,若是當長時間出現阻塞、無狀態迴應狀況,都會出現界面的未響應狀態,因此我想到了在Android開發中至關常規的子線程與UI線程通訊,Handler的使用,在PyQt中也有相似的機制,也就是Qt的核心機制,信號槽機制,更多的內容能夠看我另外的文章,我會詳細的介紹下。 網絡

下面是我修改完成後的代碼,能夠順利完成我預設的功能:app

import urllib.request
import urllib.parse
import time
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import pyqtSignal
from bs4 import BeautifulSoup

#抓取網頁的類
class Grab():
    url = ''
    soup = None
    #讀取當前網頁的源代碼數據返回
    def GetPage(self, url):
        self.url = url
        req = urllib.request.Request(url, headers={'User-Agent' : "Magic Browser"})
        try:page = urllib.request.urlopen(req)
        except:return
        tem = page.read()
        if not tem:
            print('GetPage failed!')
            sys.exit()
        return tem

    def ExtractInfo(self,buf):
        try:
            from bs4 import BeautifulSoup
            self.soup = BeautifulSoup(buf,'html.parser')
        except:return
        try:items = self.soup.findAll(attrs={'class':'link_title'})
        except:return
        links = []
        titles = []
        for item in items:
            title = item.get_text().strip()
            titles.append(title)
            linkobj = item.findAll('a')
            for it in linkobj:
                link = it['href']
                links.append('http://blog.csdn.net'+link)
        return links,titles

    def GetPageUrl(self,buf):
        pages = set()
        self.soup = BeautifulSoup(buf,'html.parser')
        pageInfo=self.soup.find(attrs={'id':'papelist'})
        #當前文章數量只有一頁
        if not pageInfo:
            return None
        pagelinks = pageInfo.findAll('a')
        for link  in pagelinks:
            pages.add('http://blog.csdn.net/'+link['href'])
        return pages

    def GetCurViewerPoint(self,buf):
        self.soup = BeautifulSoup(buf,'html.parser')
        pointobj = self.soup.find(attrs={'class':'link_view'})
        title = self.soup.find(attrs={'class':'link_title'})
        return title.get_text().strip()+' '+pointobj.get_text()



#界面類
class Ui_Form(object):
    def setupUi(self, Form):
        Form.setObjectName("Form")
        self.label = QtWidgets.QLabel(Form)
        self.label.setGeometry(QtCore.QRect(20, 30, 81, 21))
        self.label.setObjectName("label")
        self.username = QtWidgets.QPlainTextEdit(Form)
        self.username.setGeometry(QtCore.QRect(110, 20, 341, 41))
        self.username.setObjectName("username")
        self.label_2 = QtWidgets.QLabel(Form)
        self.label_2.setGeometry(QtCore.QRect(30, 80, 61, 31))
        self.label_2.setObjectName("label_2")
        self.times = QtWidgets.QPlainTextEdit(Form)
        self.times.setGeometry(QtCore.QRect(110, 80, 151, 41))
        self.times.setObjectName("times")
        self.beginBtn = QtWidgets.QPushButton(Form)
        self.beginBtn.setGeometry(QtCore.QRect(300, 80, 61, 41))
        self.beginBtn.setObjectName("beginBtn")
        self.progressBar = QtWidgets.QProgressBar(Form)
        self.progressBar.setGeometry(QtCore.QRect(30, 350, 461, 41))
        self.progressBar.setProperty("value", 0)
        self.progressBar.setObjectName("progressBar")
        self.listWidget = QtWidgets.QListWidget(Form)
        self.listWidget.setGeometry(QtCore.QRect(30, 180, 431, 151))
        self.listWidget.setObjectName("listWidget")
        self.info = QtWidgets.QLabel(Form)
        self.info.setGeometry(QtCore.QRect(30, 140, 421, 31))
        self.info.setText("")
        self.info.setObjectName("info")
        self.exitBtn = QtWidgets.QPushButton(Form)
        self.exitBtn.setGeometry(QtCore.QRect(390, 80, 61, 41))
        self.exitBtn.setObjectName("exitBtn")

        self.thread=MyThread()
        self.thread.sinOut.connect(self.handler)

        self.retranslateUi(Form)
        self.exitBtn.clicked.connect(Form.close)
        self.beginBtn.pressed.connect(self.mainFunc)
        QtCore.QMetaObject.connectSlotsByName(Form)

    def handler(self,type,text,content):
        if type == 1:
            self.listWidget.addItems(content)
        elif type == 2:
            self.progressBar.setProperty("value", float(text))
        elif type == 3:
            self.info.setText(text)


    def mainFunc(self):
        username = self.username.toPlainText().strip()
        times = self.times.toPlainText().strip()
        if username and times:
            self.thread.setVal(username,times)
            self.thread.start()

    def retranslateUi(self, Form):
        _translate = QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "Blog做弊器"))
        self.label.setText(_translate("Form", " CSDN用戶名"))
        self.label_2.setText(_translate("Form", "設置次數"))
        self.beginBtn.setText(_translate("Form", "Start"))
        self.exitBtn.setText(_translate("Form", "Exit"))


#子線程
class MyThread(QtCore.QThread):
    sinOut = pyqtSignal(int,str,set)
    articles = set()
    def __init__(self):
        super(MyThread,self).__init__()
        self.username=''
        self.times=''

    def setVal(self,username,times):
        self.username=username
        self.times=times

    def run(self):
        #發射信號
        grab = Grab()
        buf = grab.GetPage('http://blog.csdn.net/'+self.username)
        pages = ['http://blog.csdn.net/'+self.username,]
        tem = grab.GetPageUrl(buf)
        content = set()
        links = []
        titles = []
        if not tem:pass
        else: pages+=tem
        for page in pages:
            buf = grab.GetPage(page)
            link,title = grab.ExtractInfo(buf)
            links+=link
            titles+=title
        titles=zip(links,titles)
        for link in links:
            self.articles.add(link)
        for title in titles:
            tem = ''
            for val in title:
                tem+=val+' '
            content.add(tem)

        self.sinOut.emit(1,'',content)
        sumRes = len(self.articles)*int(self.times)
        cur = 1
        for url in self.articles:
            for i in range(0,int(self.times)):
                buf=grab.GetPage(url)
                self.sinOut.emit(2,str(cur/sumRes*100),content)
                self.sinOut.emit(3,grab.GetCurViewerPoint(buf),content)
                cur+=1
                time.sleep(0.1)


if __name__=='__main__':
    import sys
    app=QtWidgets.QApplication(sys.argv)
    widget=QtWidgets.QWidget()
    ui=Ui_Form()
    ui.setupUi(widget)
    widget.show()
    sys.exit(app.exec_())

因爲使用了designer默認的絕對佈局方式,代碼比較雜亂。總的來講也就分3個模塊:函數

  1. 網頁獲取、解析工做類 Grab佈局

  2. 界面佈局、實時數據展示類 Ui_Formui

  3. 邏輯控制、監控與溝通類 MyThread

各個模塊相互合做,實現功能.

經過pyinstaller(嘗試使用py2exe模塊生成,沒搞成)將py源代碼生成了win平臺可用的exe可執行程序,發佈在戳這裏 ,歡迎你們交流

相關文章
相關標籤/搜索