最近,項目中遇到了一個關於實現經過給定URL,實現對網頁屏幕進行截圖的一個功能,前面代碼中已經用python的第三方庫實現了截圖功能,但在上線之後出現了一些bug,因此就改bug的任務就落在了個人頭上,這方面前面沒有接觸過,正好利用這個機會學習一下python中各類截圖方法,下面就是我要介紹的一些經常使用截圖功能的基本使用,但願能夠幫到你,也能夠提出文中不足的地方。html
PIL:Python Imaging Library,已是Python平臺事實上的圖像處理標準庫了。PIL功能很是強大,但API卻很是簡單易用,不過只支持到Python 2.7python
Pillow:是PIL的一個派生分支,但現在已經發展成爲比PIL自己更具活力的圖像處理庫。目前最新版本是3.0.0linux
在Debian/Ubuntu Linux下直接經過apt安裝:web
$ sudo apt-get install python-imaging
Mac和其餘版本的Linux能夠直接使用easy_install或pip安裝,安裝前須要把編譯環境裝好:編程
$ sudo easy_install PIL
Windows平臺就去PIL官方網站下載exe安裝包或pipwindows
$ pip install pillow
from PIL import ImageGrab im = ImageGrab.grab() im.save("1.png") #定義保存的路徑和保存的圖片格式
PyQt是一個建立GUI應用程序的工具包。它是Python編程語言和Qt庫的成功融合。Qt庫是目前最強大的庫之一。PyQt是由Phil Thompson 開發。PyQt是Qt庫的Python版本。PyQt3支持Qt1到Qt3。 PyQt4支持Qt4。它的首次發佈也是在1998年,可是當時它叫 PyKDE,由於開始的時候SIP和PyQt沒有分開。PyQt是用SIP寫的。PyQt 提供 GPL版和商業版。app
windows下:編程語言
32位: http://sourceforge.net/projects/pyqt/files/PyQt4/PyQt-4.11.4/PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x32.exe 64位: http://sourceforge.net/projects/pyqt/files/PyQt4/PyQt-4.11.4/PyQt4-4.11.4-gpl-Py2.7-Qt4.8.7-x64.exe
linux下:ide
https://www.riverbankcomputing.com/software/pyqt/download
有些圖片較多,或是頁面較長的網站,截出來的圖,會出現圖片未加載完畢或頁面截取不完整等狀況,找了不少資料也沒解決,若是那個小夥伴會的話,能夠教一下我工具
#!/usr/bin/env python # -*- coding:utf-8 -*- import sys,time import os.path from PyQt4 import QtGui, QtCore, QtWebKit class PageShotter(QtGui.QWidget): def __init__(self, url, parent=None): QtGui.QWidget.__init__(self, parent) self.url = url def shot(self): webView = QtWebKit.QWebView(self) webView.load(QtCore.QUrl(self.url)) self.webPage = webView.page() self.connect(webView, QtCore.SIGNAL("loadFinished(bool)"), self.savePage) def savePage(self, finished): if finished: print "開始截圖!" size = self.webPage.mainFrame().contentsSize() print "頁面寬:%d,頁面高:%d" % (size.width(), size.height()) self.webPage.setViewportSize(QtCore.QSize(size.width() + 16, size.height())) img = QtGui.QImage(size, QtGui.QImage.Format_ARGB32) painter = QtGui.QPainter(img) self.webPage.mainFrame().render(painter) painter.end() fileName = "shot.png" if img.save(fileName): filePath = os.path.join(os.path.dirname(__file__), fileName) print "截圖完畢:%s" % filePath else: print "截圖失敗" else: print "網頁加載失敗!" self.close() if __name__ == "__main__": app = QtGui.QApplication(sys.argv) shotter = PageShotter("https://www.jd.com/") shotter.shot() sys.exit(app.exec_())
#!/usr/bin/python # coding:utf-8 import sys import os.path import requests import urlparse import time sys.path.append('../') from PyQt4 import QtGui, QtCore, QtWebKit from PyQt4.QtNetwork import QNetworkRequest class WebStatus(object): def __init__(self, timeout, tries): ''' 類說明:獲取網頁的狀態碼。如200表示「網頁正常,可訪問」 參數:timeout(s) 請求等待時間;tries 請求次數; ''' self.__timeout = timeout self.__tries = tries self.__headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0" } def clear_url(self, url): if not url: return None if ',' in url: url = url.split(',')[0] url = url.strip() if url.startswith('http://') or url.startswith('https://'): pass else: url = 'http://' + url try: parse = urlparse.urlparse(url) url_new = parse.scheme + '://' + parse.netloc except: url_new = url if url_new.endswith('.'): url_new = url_new.rstrip('.') return url_new def isAccessible(self, url): ''' 說明:獲取網頁狀態碼,判斷該網頁是否可正常訪問的,200表示網頁可正常訪問的。 參數:url 網址; 返回: True:狀態碼200,網頁正常可訪問; False:網頁狀態碼非200,或者請求異常。 ''' url = self.clear_url(url) tries = self.__tries status = False r = None while tries > 0: try: r = requests.get(url=url, headers=self.__headers, timeout=self.__timeout) except: tries -= 1 status = False else: if r.status_code == 200: status = url break finally: if r: r.close() r = None return status def __del__(self): pass class PageShotter(QtGui.QWidget): def __init__(self, url, parent=None,pic_path = './pic_path'): path = os.path.exists(pic_path) if not path: os.mkdir(pic_path) self.request = QNetworkRequest() QtGui.QWidget.__init__(self, parent) self.url = url self.dir_path = os.path.join(pic_path,str(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) + '_' + urlparse.urlparse(self.url).netloc + '.png') def shot(self): webView = QtWebKit.QWebView(self) self.request.setRawHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0") webView.load(QtCore.QUrl(self.url)) self.webPage = webView.page() self.connect(webView, QtCore.SIGNAL("loadFinished(bool)"), self.savePage) return self.dir_path def savePage(self, finished): if finished: size = self.webPage.mainFrame().contentsSize() self.webPage.setViewportSize(QtCore.QSize(size.width() + 16, size.height())) img = QtGui.QImage(size, QtGui.QImage.Format_ARGB32) painter = QtGui.QPainter(img) self.webPage.mainFrame().render(painter) painter.end() if img.save(self.dir_path): filePath = os.path.join(os.path.dirname(__file__),self.dir_path) #print "截圖完畢:%s" % filePath else: print "截圖失敗" else: print "網頁加載失敗!" self.close() if __name__ == "__main__": obj1 = WebStatus(10, 2) status = obj1.isAccessible("https://www.jd.com") if status: app = QtGui.QApplication(sys.argv) shotter = PageShotter(status,pic_path = './dirpath_test') obj = shotter.shot() print obj sys.exit(app.exec_()) else: print 'Invalid URL'
安裝請參考 http://www.cnblogs.com/luxiaojun/p/6144748.html
from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities dcap = dict(DesiredCapabilities.PHANTOMJS) #設置userAgent dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 ") obj = webdriver.PhantomJS(executable_path='C:\Python27\Scripts\phantomjs.exe',desired_capabilities=dcap) #加載網址 obj.get('http://wap.95533pc.com')#打開網址 obj.save_screenshot("1.png") #截圖保存 obj.quit()
另外還有利用subprocess中的popen方法指令linux命令進行截圖的方法,如 cutycapt,會在之後的文章中繼續寫到,一樣也能夠參考如下文章:
http://www.111cn.net/sys/linux/81361.htm