從一個啓動瀏覽器並打開百度網頁的代碼開始html
from selenium import webdriver driver = webdriver.chrome() driver.get('https://www.baidu.com')
from selenium import webdriver
這代碼表示從selenium導入webdriver。進入selenium, 發現webdriver是一個包,那麼導入的實際上是webdriver包下的`___init__.py`文件前端
from .firefox.webdriver import WebDriver as Firefox # noqa from .firefox.firefox_profile import FirefoxProfile # noqa from .firefox.options import Options as FirefoxOptions # noqa
#實例化的是.chrome.webdriver裏的webDriver from .chrome.webdriver import WebDriver as Chrome # noqa from .chrome.options import Options as ChromeOptions # noqa from .ie.webdriver import WebDriver as Ie # noqa from .ie.options import Options as IeOptions # noqa from .edge.webdriver import WebDriver as Edge # noqa from .opera.webdriver import WebDriver as Opera # noqa from .safari.webdriver import WebDriver as Safari # noqa from .blackberry.webdriver import WebDriver as BlackBerry #noqa from .phantomjs.webdriver import WebDriver as PhantomJS # noqa from .android.webdriver import WebDriver as Android # noqa from .webkitgtk.webdriver import WebDriver as WebKitGTK # noqa from .webkitgtk.options import Options as WebKitGTKOptions from .remote.webdriver import WebDriver as Remote # noqa from .common.desired_capabilities import DesiredCapabilities from .common.action_chains import ActionChains # noqa from .common.touch_actions import TouchActions # noqa from .common.proxy import Proxy # noqa
打開chrome.webdriver文件,下面只展現出相關代碼android
#selenium/webdriver/chrome/webdriver.py import warnings from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from .remote_connection import ChromeRemoteConnection from .service import Service from .options import Options class WebDriver(RemoteWebDriver): def __init__(self, executable_path="chromedriver", port=0, options=None, service_args=None, desired_capabilities=None, service_log_path=None, chrome_options=None): """ 參數: - executable_path - chromedriver的執行路徑 默認在環境變裏中查找 - port -http鏈接的端口號 - desired_capabilities: 通常瀏覽器的字典對象 - options: ChromeOptions的實例 """ #………………………………省略………………………………………… #第1步 實例化一個Service對象 self.service = Service( executable_path, port=port, service_args=service_args, log_path=service_log_path) #第2步 調用了service的start方法 self.service.start() #………………………………省略…………………………………………
WebDriver構造方法中最早實例化Service類,咱們實例化chrome() 並無參數,因此Service 的參數 executable_path="chromedriver" port=0,其他都是Nonegit
打開Chrome目錄Service文件, 只有如下代碼github
#selenium/webdriver/chrome/service.py from selenium.webdriver.common import service class Service(service.Service): """ 實例化Service對象 管理ChromeDriver的啓動和中止 """ def __init__(self, executable_path, port=0, service_args=None, log_path=None, env=None): """ 參數: - service_args : chromedriver 的參數 列表形式 - log_path : chromedriver的日誌路徑 """ self.service_args = service_args or [] if log_path: self.service_args.append('--log-path=%s' % log_path) #第1步 調用復類的構造方法 service.Service.__init__(self, executable_path, port=port, env=env, start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home") #重寫父類方法 獲取命令行的參數 def command_line_args(self): return ["--port=%d" % self.port] + self.service_args
該類繼承了selenium.webdriver.common目錄下 service 類,並重寫了父類的command_line_args方法。構造方法中調用了父類的構造方法。web
#selenium/webdriver/common/service.py import errno import os import platform import subprocess from subprocess import PIPE import time from selenium.common.exceptions import WebDriverException from selenium.webdriver.common import utils try: from subprocess import DEVNULL _HAS_NATIVE_DEVNULL = True except ImportError: DEVNULL = -3 _HAS_NATIVE_DEVNULL = False class Service(object): def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""): self.path = executable self.port = port #默認自動獲取一個端口 if self.port == 0: self.port = utils.free_port() if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL: log_file = open(os.devnull, 'wb') self.start_error_message = start_error_message self.log_file = log_file #默認獲取系統的環境變量 self.env = env or os.environ @property def service_url(self): """ Gets the url of the Service """ return "http://%s" % utils.join_host_port('localhost', self.port) def command_line_args(self): raise NotImplemented("This method needs to be implemented in a sub class") def start(self): """ Starts the Service. :Exceptions: - WebDriverException : Raised either when it can't start the service or when it can't connect to the service """ try: #啓動chromedriver程序 參數爲 --port=端口號 輸入輸出到devnull空設備 cmd = [self.path] cmd.extend(self.command_line_args()) self.process = subprocess.Popen(cmd, env=self.env, close_fds=platform.system() != 'Windows', stdout=self.log_file, stderr=self.log_file, stdin=PIPE) except TypeError: raise except OSError as err: if err.errno == errno.ENOENT: raise WebDriverException( "'%s' executable needs to be in PATH. %s" % ( os.path.basename(self.path), self.start_error_message) ) elif err.errno == errno.EACCES: raise WebDriverException( "'%s' executable may have wrong permissions. %s" % ( os.path.basename(self.path), self.start_error_message) ) else: raise except Exception as e: raise WebDriverException( "The executable %s needs to be available in the path. %s\n%s" % (os.path.basename(self.path), self.start_error_message, str(e))) count = 0 #檢測是否subprocess進程是否還在,不在則拋出異常 #檢測是否http協議是否連接 若無連接等待30秒拋出異常 while True: self.assert_process_still_running() if self.is_connectable(): break count += 1 time.sleep(1) if count == 30: raise WebDriverException("Can not connect to the Service %s" % self.path) def assert_process_still_running(self): return_code = self.process.poll() if return_code is not None: raise WebDriverException( 'Service %s unexpectedly exited. Status code was: %s' % (self.path, return_code) )
#判斷是否正在鏈接,等待30秒後拋出webdriver異常 def is_connectable(self): return utils.is_connectable(self.port)
由上代碼可知Serivce的實例化 獲取一個端口。
而後調用了service對象的start方法。該方法用subprocess啓動chromedriver程序 並檢測是否正在鏈接。
如今再來看最開始chrome 的webDriver類, 此類繼承了selenium.webdriver.remote下的webdriver並調用了父類的構造方法。
chrome
#selenium/webdriver/remote/webdriver.py import warnings from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from .remote_connection import ChromeRemoteConnection from .service import Service from .options import Options class WebDriver(RemoteWebDriver): """ Controls the ChromeDriver and allows you to drive the browser. You will need to download the ChromeDriver executable from http://chromedriver.storage.googleapis.com/index.html """ def __init__(self, executable_path="chromedriver", port=0, options=None, service_args=None, desired_capabilities=None, service_log_path=None, chrome_options=None): #………………………………省略………………………………………… if options is None: # desired_capabilities stays as passed in if desired_capabilities is None: #第1步 建立一個瀏覽器的字典對象 desired_capabilities = self.create_options().to_capabilities() else: if desired_capabilities is None: desired_capabilities = options.to_capabilities() else: desired_capabilities.update(options.to_capabilities()) #………………………………省略………………………………………… #第二步調用 復類的構造方法 try: RemoteWebDriver.__init__( self, command_executor=ChromeRemoteConnection( remote_server_addr=self.service.service_url), desired_capabilities=desired_capabilities) except Exception: self.quit() raise self._is_remote = False def create_options(self): return Options()
首先建立一個瀏覽器的字典對象,而後調用了to_capabilities()方法。
Options的to_capabilities()方法是返回一個caps字典對象json
chrome瀏覽器返回的caps字典對象爲:
{
'browserName': 'chrome',
'version': '',
'platform': 'ANY',
'goog:chromeOptions': {'extensions': [], 'args': []}
}api
接下來看看 RemoteWebDriver的構造方法瀏覽器
RemoteWebDriver.__init__( self, command_executor=ChromeRemoteConnection( remote_server_addr=self.service.service_url), desired_capabilities=desired_capabilities)
傳入了2個參數 一個是 ChromeRemoteConnection類的實例對象, 一個是前面獲取到的瀏覽器字典對象。
來看看ChromeRemoteConnection類。繼承了RemoteConnection,調用了父類的構造方法並往self._commands添加里幾個command鍵值對
#selenium/webdriver/chrome/remote_connection.py from selenium.webdriver.remote.remote_connection import RemoteConnection class ChromeRemoteConnection(RemoteConnection): def __init__(self, remote_server_addr, keep_alive=True): RemoteConnection.__init__(self, remote_server_addr, keep_alive) self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app') self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions') self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions')
#selenium/webdriver/remote/remote_connection.py class RemoteConnection(object): """A connection with the Remote WebDriver server. Communicates with the server using the WebDriver wire protocol: https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol""" def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True): # Attempt to resolve the hostname and get an IP address. self.keep_alive = keep_alive parsed_url = parse.urlparse(remote_server_addr) addr = parsed_url.hostname if parsed_url.hostname and resolve_ip: port = parsed_url.port or None if parsed_url.scheme == "https": ip = parsed_url.hostname elif port and not common_utils.is_connectable(port, parsed_url.hostname): ip = None LOGGER.info('Could not connect to port {} on host ' '{}'.format(port, parsed_url.hostname)) else: ip = common_utils.find_connectable_ip(parsed_url.hostname, port=port) if ip: netloc = ip addr = netloc if parsed_url.port: netloc = common_utils.join_host_port(netloc, parsed_url.port) if parsed_url.username: auth = parsed_url.username if parsed_url.password: auth += ':%s' % parsed_url.password netloc = '%s@%s' % (auth, netloc) remote_server_addr = parse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) else: LOGGER.info('Could not get IP address for host: %s' % parsed_url.hostname) self._url = remote_server_addr if keep_alive: self._conn = httplib.HTTPConnection( str(addr), str(parsed_url.port), timeout=self._timeout) self._commands = { Command.STATUS: ('GET', '/status'), Command.NEW_SESSION: ('POST', '/session'), Command.GET_ALL_SESSIONS: ('GET', '/sessions'), Command.QUIT: ('DELETE', '/session/$sessionId'), Command.GET_CURRENT_WINDOW_HANDLE: ('GET', '/session/$sessionId/window_handle'), Command.W3C_GET_CURRENT_WINDOW_HANDLE: ('GET', '/session/$sessionId/window'), Command.GET_WINDOW_HANDLES: ('GET', '/session/$sessionId/window_handles'), #................省略................. } #最終發送命令到遠程服務器的方法 def execute(self, command, params): command_info = self._commands[command] assert command_info is not None, 'Unrecognised command %s' % command path = string.Template(command_info[1]).substitute(params) if hasattr(self, 'w3c') and self.w3c and isinstance(params, dict) and 'sessionId' in params: del params['sessionId'] data = utils.dump_json(params) url = '%s%s' % (self._url, path) return self._request(command_info[0], url, body=data) #返回帶有JSON解析的字典 def _request(self, method, url, body=None): """ Send an HTTP request to the remote server. :Args: - method - A string for the HTTP method to send the request with. - url - A string for the URL to send the request to. - body - A string for request body. Ignored unless method is POST or PUT. :Returns: A dictionary with the server's parsed JSON response. """ LOGGER.debug('%s %s %s' % (method, url, body)) parsed_url = parse.urlparse(url) headers = self.get_remote_connection_headers(parsed_url, self.keep_alive) resp = None if body and method != 'POST' and method != 'PUT': body = None if self.keep_alive: resp = self._conn.request(method, url, body=body, headers=headers) statuscode = resp.status else: http = urllib3.PoolManager(timeout=self._timeout) resp = http.request(method, url, body=body, headers=headers) statuscode = resp.status if not hasattr(resp, 'getheader'): if hasattr(resp.headers, 'getheader'): resp.getheader = lambda x: resp.headers.getheader(x) elif hasattr(resp.headers, 'get'): resp.getheader = lambda x: resp.headers.get(x) data = resp.data.decode('UTF-8') try: if 300 <= statuscode < 304: return self._request('GET', resp.getheader('location')) if 399 < statuscode <= 500: return {'status': statuscode, 'value': data} content_type = [] if resp.getheader('Content-Type') is not None: content_type = resp.getheader('Content-Type').split(';') if not any([x.startswith('image/png') for x in content_type]): try: data = utils.load_json(data.strip()) except ValueError: if 199 < statuscode < 300: status = ErrorCode.SUCCESS else: status = ErrorCode.UNKNOWN_ERROR return {'status': status, 'value': data.strip()} # Some of the drivers incorrectly return a response # with no 'value' field when they should return null. if 'value' not in data: data['value'] = None return data else: data = {'status': 0, 'value': data} return data finally: LOGGER.debug("Finished Request") resp.close()
構造方法中主要是把localhost域名換成127.0.0.1,經過urllib.parse.urlparse把要處理的url解析6大部分。
urlparse返回的是一個名字元組對象scheme, netloc, path, params, query, fragment。netloc包括hostname和port。
調用 common_utils.find_connectable_ip()方法獲取hostname對應的ip地址,最後urllib.parse.urlunparse()從新組成url並賦值給self._url
初始化裏self._commands 字典,value爲具體執行的命令的字典。
RemoteConnection類的實例方法execute調用 _request方法最終實現發送命令到遠程服務器。
他們是經過wire protocol有線協議 這種協議是點對點方式進行通訊的。首先前端將這個點擊轉換成json格式的字符串,而後經過wire protocl協議傳遞給服務器
RemoteWebDriver類的構造方法 更新capabilities字典 主要調用start_session傳入capabilities字典
start_session方法 根據capabilities字典建立一個新的會話並獲取session_id。
另外還實例化了錯誤處理handle,文件查找file_detector(默認實例化是LocalFileDetector)。一個頁面切換的SwitchTo對象。
#selenium/webdriver/remote/webdriver.py class WebDriver(object): _web_element_cls = WebElement def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=None, browser_profile=None, proxy=None, keep_alive=False, file_detector=None, options=None): """ 建立一個driver使用 wire協議發送命令 參數: - command_executor - 遠程服務器的url 'http://127.0.0.1:端口號' - desired_capabilities - A dictionary of capabilities to request when starting the browser session. 必選參數 - proxy - 一個selenium.webdriver.common.proxy.Proxy 對象. 可選的 - file_detector - 自定義文件檢測器對象. 默認使用LocalFileDetector() - options - options.Options類的實例 """ capabilities = {} if options is not None: capabilities = options.to_capabilities() if desired_capabilities is not None: if not isinstance(desired_capabilities, dict): raise WebDriverException("Desired Capabilities must be a dictionary") else: #更新capabilities字典 capabilities.update(desired_capabilities) if proxy is not None: warnings.warn("Please use FirefoxOptions to set proxy", DeprecationWarning) proxy.add_to_capabilities(capabilities) self.command_executor = command_executor if type(self.command_executor) is bytes or isinstance(self.command_executor, str): self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive) self._is_remote = True #控制瀏覽器會話的字符串id self.session_id = None self.capabilities = {} #errorhandler.ErrorHandler 處理錯誤的handler self.error_handler = ErrorHandler() self.start_client() if browser_profile is not None: warnings.warn("Please use FirefoxOptions to set browser profile", DeprecationWarning) #核心代碼 開始一個會話 self.start_session(capabilities, browser_profile) #實例化頁面切換對象 self._switch_to = SwitchTo(self) #app self._mobile = Mobile(self) #默認實例化LocalFileDetector對象 self.file_detector = file_detector or LocalFileDetector def start_session(self, capabilities, browser_profile=None): """ 根據capabilities字典建立一個新的會話 browser_profile FirefoxProfile的一個對象 只有火狐瀏覽器 """ if not isinstance(capabilities, dict): raise InvalidArgumentException("Capabilities must be a dictionary") if browser_profile: if "moz:firefoxOptions" in capabilities: capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded else: capabilities.update({'firefox_profile': browser_profile.encoded}) """ _make_w3c_caps return dict { "firstMatch": [{}], "alwaysMatch": { 'browserName': 'chrome', 'version': '', 'platformName': 'any', 'goog:chromeOptions': {'extensions': [], 'args': []} } } """ w3c_caps = _make_w3c_caps(capabilities) parameters = {"capabilities": w3c_caps, "desiredCapabilities": capabilities} #Command.NEW_SESSION: ('POST', '/session'), response = self.execute(Command.NEW_SESSION, parameters) if 'sessionId' not in response: response = response['value'] #獲取session_id self.session_id = response['sessionId'] self.capabilities = response.get('value') # if capabilities is none we are probably speaking to # a W3C endpoint if self.capabilities is None: self.capabilities = response.get('capabilities') # Double check to see if we have a W3C Compliant browser self.w3c = response.get('status') is None self.command_executor.w3c = self.w3c def _make_w3c_caps(caps): """Makes a W3C alwaysMatch capabilities object. Filters out capability names that are not in the W3C spec. Spec-compliant drivers will reject requests containing unknown capability names. Moves the Firefox profile, if present, from the old location to the new Firefox options object. :Args: - caps - A dictionary of capabilities requested by the caller. """ #深拷貝 caps = copy.deepcopy(caps) #由於瀏覽器chrome 因此profile爲None profile = caps.get('firefox_profile') always_match = {} if caps.get('proxy') and caps['proxy'].get('proxyType'): caps['proxy']['proxyType'] = caps['proxy']['proxyType'].lower() for k, v in caps.items(): #若是caps的key 在_OSS_W3C_CONVERSION key中 並且caps的key對應的值不爲空 if v and k in _OSS_W3C_CONVERSION: #always_match的key 爲_OSS_W3C_CONVERSION字典的值 value是caps字典的值 always_match[_OSS_W3C_CONVERSION[k]] = v.lower() if k == 'platform' else v if k in _W3C_CAPABILITY_NAMES or ':' in k: always_match[k] = v if profile: moz_opts = always_match.get('moz:firefoxOptions', {}) # If it's already present, assume the caller did that intentionally. if 'profile' not in moz_opts: # Don't mutate the original capabilities. new_opts = copy.deepcopy(moz_opts) new_opts['profile'] = profile always_match['moz:firefoxOptions'] = new_opts return {"firstMatch": [{}], "alwaysMatch": always_match} _OSS_W3C_CONVERSION = { 'acceptSslCerts': 'acceptInsecureCerts', 'version': 'browserVersion', 'platform': 'platformName' } #經過self.command_executor.execute發送cmd命令到遠程服務器達到控制瀏覽器的目標。 def execute(self, driver_command, params=None): """ 經過command.CommandExecutor執行driver_command命令 返回一個字典對象 裏面裝着JSON response """ if self.session_id is not None: if not params: params = {'sessionId': self.session_id} elif 'sessionId' not in params: params['sessionId'] = self.session_id #數據封包 params = self._wrap_value(params) #核心代碼 執行cmmand_executor實例對象的execute方法 response = self.command_executor.execute(driver_command, params) if response: self.error_handler.check_response(response) #數據解包 response['value'] = self._unwrap_value( response.get('value', None)) return response # If the server doesn't send a response, assume the command was # a success return {'success': 0, 'value': None, 'sessionId': self.session_id}
driver.get('https://www.baidu.com')調用的是webdriver/remote/webdriver.py下的get方法
get方法調用了remote_connection.py中execute的方法,remote_connection.py中execute的方法中self.command_executor.execute實際調用的是RemoteConnection.py的execute方法。
其實是一個HTTP request給監聽端口上的Web Service, 在咱們的HTTP request的body中,會以WebDriver Wire協議規定的JSON格式的字符串來告訴Selenium咱們但願瀏覽器打開'https://www.baidu.com'頁面
#selenium/webdriver/remote/webdriver.py def get(self, url): """ Loads a web page in the current browser session. """ #Command.GET: ('POST', '/session/$sessionId/url'), self.execute(Command.GET, {'url': url})
總結一下:
首先是webdriver實例化Service 類調用start()方法用subprocess啓動chromedriver(帶--port參數)驅動。chromedriver啓動以後都會在綁定的端口啓動Web Service。
接着實例化RemoteConnection得到 command_executor實例化對象 傳入給RemoteWebDriver構造方法。
RemoteWebDriver構造方法 start_session()方法啓動session並得到惟一的session_id,經過這個session_id來肯定找到對方且在多線程並行的時候彼此之間不會有衝突和干擾)
接下來調用WebDriver的任何API,好比get() 都須要藉助一個ComandExecutor(remote_connection類的實例對象)調用execute()發送一個命令(這個命令在ComandExecutor實例化時候生成的一個command字典)。
#部分 self._commands = { Command.STATUS: ('GET', '/status'), Command.NEW_SESSION: ('POST', '/session'), Command.GET_ALL_SESSIONS: ('GET', '/sessions'), Command.QUIT: ('DELETE', '/session/$sessionId'), Command.GET_CURRENT_WINDOW_HANDLE: ('GET', '/session/$sessionId/window_handle'), Command.W3C_GET_CURRENT_WINDOW_HANDLE: ('GET', '/session/$sessionId/window'), Command.GET_WINDOW_HANDLES: ('GET', '/session/$sessionId/window_handles'), #.................省略..................... }
ComandExecutor中的execute()方法最後返回一個_request()方法,其實是一個HTTP request給監聽端口上的Web Service。
在HTTP request的body中,Wire JSON格式字典來告訴chromedriver接下來作什麼事。(經過以前綁定的端口)
實際的執行者是chromedriver驅動,而selenium就至關於一個代理。因此selenium並非直接操控瀏覽器而是運行webdriver, 經過webdriver間接操控瀏覽器。
在現實生活中這相似打出租車,咱們告訴司機目的地是哪?走哪條路到達?webdriver就至關於出租車司機。