04Selenium剩餘部分及練習:爬取京東商品信息

昨日回顧

1、爬取豆瓣電影top250

1.爬取電影頁php

2.解析提取電影信息css

3.保存數據jquery

2、Selenium請求庫

驅動瀏覽器往目標網站發送請求,獲取響應數據web

-不須要分析複雜的通訊流程chrome

-執行js代碼api

-獲取動態數據瀏覽器

3、Selenium使用

driver = webdriver.Chrome()網站

隱式等待

driver.get('網站') 往某個網站發送請求ui

顯式等待

driver.close()url

4、選擇器

element:查找一個

elements:查找多個

by_id

by_class_name

by_name

by_link_text

by_partial_link_text

by_css_selector

今日內容

1、Selenium剩餘部分

1.元素交互操做

1.1 點擊,清除

click

clear

示例:

from selenium import webdriver
from selenium.webdriver.common.by import By #按照什麼方式查找,By.Id,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做
from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一塊兒用的
from selenium.webdriver.support.wait import WebDriverWait #等待頁面加載某些元素

import time

driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')

try:
    driver.implicitly_wait(10)
    driver.get("https://www.jd.com/")
    time.sleep(5)

    #點擊、清除
    input = driver.find_element_by_id('key')
    input.send_keys('圍城')

    search = driver.find_element_by_class_name('button')
    search.click()

    time.sleep(3)

    input2 = driver.find_element_by_id('key')
    input2.clear()
    time.sleep(1)
    input2.send_keys('墨菲定律')
    input2.send_keys(Keys.ENTER)
    time.sleep(10)
finally:
    driver.close()
1.2 Action Chains

是一個動做鏈對象,

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什麼方式查找,By.Id,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做
from selenium.webdriver.support import expected_conditions as EC #和下面WebDriverWait一塊兒用的
from selenium.webdriver.support.wait import WebDriverWait #等待頁面加載某些元素

import time

driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')

try:
    driver.implicitly_wait(10)
    driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    time.sleep(5)

    driver.switch_to.frame('iframeResult')
    time.sleep(1)
    #獲取動做鏈對象
    action = ActionChains(driver)
    source = driver.find_element_by_id('draggable')
    target = driver.find_element_by_id('droppable')

    #方式一
    #秒移
    # action.drag_and_drop(source, target).perform() #擬定好一個動做。須要調用執行方法.perform

    # 方式二
    # 一點點移動
    #找到滑動距離

    print(source.tag_name)
    print(source.text)
    print(source.size)

    print(target.location)
    print(source.location)

    x=target.location['x']-source.location['x']

    #按住div,不一樣動做不能公用一個ActionChains
    ActionChains(driver).click_and_hold(source).perform()

    s = 0
    while s < x:
        #每一次都要調用一次ActionChains
        ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()
        s += 2

        time.sleep(0.1)
    #放下div
    ActionChains(driver).release(source).perform()
    time.sleep(10)

finally:
    driver.close()
1.3 fram切換
driver.switch_to.frame(frame 的id名)
1.4 執行js代碼
driver.get("https://www.baidu.com/")

driver.execute_script(
    '''
            alert("你好")

    '''
)
time.sleep(5)
1.5 其餘
#模擬瀏覽器的前進後退
import time
from selenium import webdriver

browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.get('https://www.taobao.com')
browser.get('http://www.sina.com.cn/')

#後退
browser.back()
time.sleep(10)
#前進
browser.forward()
browser.close()

2、練習:爬取京東商品信息

簡單版本

from selenium import webdriver
from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做

driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')
try:
    driver.implicitly_wait(10)

    driver.get('https://www.jd.com/')

    # 往京東主頁輸入墨菲定律
    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(2)

    goods = driver.find_elements_by_class_name('gl-item')
    for good in goods:
        # print(good)
        #商品名稱
        name = good.find_element_by_css_selector('.p-name em').text
        # print(name)

        #商品價格
        price = good.find_element_by_class_name('p-price').text

        #商品連接
        url = good.find_element_by_css_selector('.p-name a').get_attribute('href')

        #商品評價
        commit = good.find_element_by_class_name('p-commit').text

        good_content = f'''
        商品名稱:{name}
        商品價格:{price}
        商品連接:{url}
        商品評價:{commit}
        \n
        '''
        print(good_content)

        with open('jd.txt','a',encoding='utf-8') as f:
            f.write(good_content)

    print("寫入成功")

finally:
    driver.close()

改良版本

(加入了自動下拉加載商品與點擊下一頁):

from selenium import webdriver
from selenium.webdriver.common.keys import Keys #鍵盤按鍵操做



def get_goods(driver):
    num = 1
    js_code = '''
    window.scrollTo(0,5000)
    '''
    driver.execute_script(js_code)
    try:
        goods = driver.find_elements_by_class_name('gl-item')

        for good in goods:
            # print(good)
            # 商品名稱
            name = good.find_element_by_css_selector('.p-name em').text
            # print(name)

            # 商品價格
            price = good.find_element_by_class_name('p-price').text

            # 商品連接
            url = good.find_element_by_css_selector('.p-name a').get_attribute('href')

            # 商品評價
            commit = good.find_element_by_class_name('p-commit').text

            good_content = f'''
                    num:{num}
                    商品名稱:{name}
                    商品價格:{price}
                    商品連接:{url}
                    商品評價:{commit}
                    '''
            print(good_content)
            with open('jd.txt', 'a', encoding='utf-8') as f:
                f.write(good_content)
            num += 1
        print("寫入成功")

        #找到下一頁
        next_tag = driver.find_element_by_class_name('pn-next')
        next_tag.click()

        time.sleep(5)
        get_goods(driver)
    finally:
        driver.close()


if __name__ == '__main__':
    driver = webdriver.Chrome(r'C:\Program Files (x86)\chromedriver.exe')
    try:
        driver.implicitly_wait(10)

        driver.get('https://www.jd.com/')

        # 往京東主頁輸入墨菲定律
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('墨菲定律')
        input_tag.send_keys(Keys.ENTER)

        get_goods(driver)
    finally:
        driver.close()
相關文章
相關標籤/搜索