1.爬取電影頁php
2.解析提取電影信息css
3.保存數據jquery
驅動瀏覽器往目標網站發送請求,獲取響應數據web
-不須要分析複雜的通訊流程chrome
-執行js代碼api
-獲取動態數據瀏覽器
driver = webdriver.Chrome()網站
driver.get('網站') 往某個網站發送請求ui
driver.close()url
element:查找一個
elements:查找多個
by_id
by_class_name
by_name
by_link_text
by_partial_link_text
by_css_selector
click
clear
示例:
from selenium import webdriver from selenium.webdriver.common.by import By #按照什麼方式查找,By.Id,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做 from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一塊兒用的 from selenium.webdriver.support.wait import WebDriverWait #等待頁面加載某些元素 import time driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get("https://www.jd.com/") time.sleep(5) #點擊、清除 input = driver.find_element_by_id('key') input.send_keys('圍城') search = driver.find_element_by_class_name('button') search.click() time.sleep(3) input2 = driver.find_element_by_id('key') input2.clear() time.sleep(1) input2.send_keys('墨菲定律') input2.send_keys(Keys.ENTER) time.sleep(10) finally: driver.close()
是一個動做鏈對象,
from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #按照什麼方式查找,By.Id,By.CSS_SELECTOR from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做 from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一塊兒用的 from selenium.webdriver.support.wait import WebDriverWait #等待頁面加載某些元素 import time driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') time.sleep(5) driver.switch_to.frame('iframeResult') time.sleep(1) #獲取動做鏈對象 action = ActionChains(driver) source = driver.find_element_by_id('draggable') target = driver.find_element_by_id('droppable') #方式一 #秒移 # action.drag_and_drop(source, target).perform() #擬定好一個動做。須要調用執行方法.perform # 方式二 # 一點點移動 #找到滑動距離 print(source.tag_name) print(source.text) print(source.size) print(target.location) print(source.location) x=target.location['x']-source.location['x'] #按住div,不一樣動做不能公用一個ActionChains ActionChains(driver).click_and_hold(source).perform() s = 0 while s < x: #每一次都要調用一次ActionChains ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform() s += 2 time.sleep(0.1) #放下div ActionChains(driver).release(source).perform() time.sleep(10) finally: driver.close()
driver.switch_to.frame(frame 的id名)
driver.get("https://www.baidu.com/") driver.execute_script( ''' alert("你好") ''' ) time.sleep(5)
#模擬瀏覽器的前進後退 import time from selenium import webdriver browser=webdriver.Chrome() browser.get('https://www.baidu.com') browser.get('https://www.taobao.com') browser.get('http://www.sina.com.cn/') #後退 browser.back() time.sleep(10) #前進 browser.forward() browser.close()
from selenium import webdriver from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做 driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get('https://www.jd.com/') # 往京東主頁輸入墨菲定律 input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(2) goods = driver.find_elements_by_class_name('gl-item') for good in goods: # print(good) #商品名稱 name = good.find_element_by_css_selector('.p-name em').text # print(name) #商品價格 price = good.find_element_by_class_name('p-price').text #商品連接 url = good.find_element_by_css_selector('.p-name a').get_attribute('href') #商品評價 commit = good.find_element_by_class_name('p-commit').text good_content = f''' 商品名稱:{name} 商品價格:{price} 商品連接:{url} 商品評價:{commit} \n ''' print(good_content) with open('jd.txt','a',encoding='utf-8') as f: f.write(good_content) print("寫入成功") finally: driver.close()
(加入了自動下拉加載商品與點擊下一頁):
from selenium import webdriver from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做 def get_goods(driver): num = 1 js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) try: goods = driver.find_elements_by_class_name('gl-item') for good in goods: # print(good) # 商品名稱 name = good.find_element_by_css_selector('.p-name em').text # print(name) # 商品價格 price = good.find_element_by_class_name('p-price').text # 商品連接 url = good.find_element_by_css_selector('.p-name a').get_attribute('href') # 商品評價 commit = good.find_element_by_class_name('p-commit').text good_content = f''' num:{num} 商品名稱:{name} 商品價格:{price} 商品連接:{url} 商品評價:{commit} ''' print(good_content) with open('jd.txt', 'a', encoding='utf-8') as f: f.write(good_content) num += 1 print("寫入成功") #找到下一頁 next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(5) get_goods(driver) finally: driver.close() if __name__ == '__main__': driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe') try: driver.implicitly_wait(10) driver.get('https://www.jd.com/') # 往京東主頁輸入墨菲定律 input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) get_goods(driver) finally: driver.close()