爬取京東商品信息

爬取京東商品信息css

 

from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
import time

option = ChromeOptions()
option.add_argument('disable-infobars')



driver = webdriver.Chrome(chrome_options=option)

def get_goods(driver):
    number = 400
    for line in range(20):
        js = '''
                window.scrollTo(0, %s)
            ''' % number
        number += 500
        driver.execute_script(js)
        time.sleep(0.2)

    # 查找全部商品的父標籤
    good_div = driver.find_element_by_id('J_goodsList')
    # 獲取全部商品的標籤
    good_list = good_div.find_elements_by_class_name('gl-item')
    for good in good_list:
        '''
        商品信息:
            名稱
            價格
            連接
            圖片
            評價人數
        '''
        # 商品名稱
        good_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '')

        # 商品價格
        good_price = good.find_element_by_css_selector('.p-price').text.replace('\n', '')

        # 商品連接
        good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href')

        # 商品圖片
        good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src')

        # 評價人數
        good_commit = good.find_element_by_css_selector('.p-commit').text.replace('\n', ' ')

        goods = '''
            商品名稱: %s
            商品價格: %s
            商品連接: %s
            商品圖片: %s
            評價人數: %s
            ''' % (good_name, good_price, good_link, good_img, good_commit)

        print(goods)

        with open('京東女士內衣數據爬去.txt', 'a', encoding='utf-8') as f:
            f.write(goods + '\n')


    next_tag = driver.find_element_by_class_name('pn-next')
    next_tag.click()
    time.sleep(3)

    # 遞歸執行get_goods函數
    get_goods(driver)



try:
    driver.get('https://www.jd.com/')
    driver.implicitly_wait(10)
    input_tag = driver.find_element_by_id('key')
    input_tag.send_keys('女士內衣')
    search_button = driver.find_element_by_class_name('button')
    search_button.click()

    get_goods(driver)



    time.sleep(1000)


finally:
    driver.close()
相關文章
相關標籤/搜索