python爬蟲:爬取京東商品信息

'''
初級版
'''

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(r'C:\Users\Auraro\Desktop/chromedriver.exe')
num = 1
try:
    driver.implicitly_wait(10)
    driver.get('https://www.jd.com/')

    input_tag =  driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)

    good_list = driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        # print(good)
        # 商品名稱
        good_name = good.find_element_by_css_selector('.p-name em').text
        print(good_name)

        # 商品連接
        good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
        print(good_url)

        # 商品價格
        good_price = good.find_element_by_class_name('p-price').text
        print(good_price)

        # 商品評價
        good_commit = good.find_element_by_class_name('p-commit').text
        good_content = '''
        num={}
        商品名稱:{}
        商品連接:{}
        商品價格:{}
        商品的評價條數:{}
        \n
        '''.format(num,good_name,good_url,good_price,good_commit)
        print(good_content)
        with open('jd.txt','a',encoding='utf-8') as f:
            f.write(good_content)

    print('商品信息寫入成功!')
finally:
    driver.close()

'''
終極版
'''

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(r'C:\Users\Auraro\Desktop/chromedriver.exe')
num = 1
try:
    driver.implicitly_wait(10)
    driver.get('https://www.jd.com/')

    input_tag =  driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)

    # 下拉滑動5000px
    js_code = '''
    windows.scrollTo(0,5000)
        '''
    driver.execute_script(js_code)

    # 等待5s待商品數據加載
    time.sleep(5)

    good_list = driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        # print(good)
        # 商品名稱
        good_name = good.find_element_by_css_selector('.p-name em').text
        print(good_name)

        # 商品連接
        good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
        print(good_url)

        # 商品價格
        good_price = good.find_element_by_class_name('p-price').text
        print(good_price)

        # 商品評價
        good_commit = good.find_element_by_class_name('p-commit').text
        good_content = '''
        num={}
        商品名稱:{}
        商品連接:{}
        商品價格:{}
        商品的評價條數:{}
        \n
        '''.format(num,good_name,good_url,good_price,good_commit)
        print(good_content)
        with open('jd.txt','a',encoding='utf-8') as f:
            f.write(good_content)
        num += 1

    print('商品信息寫入成功!')

    next_tag = driver.find_element_by_class_name('pn-next')
    next_tag.click

    time.sleep(10)

finally:
    driver.close()

'''
狂暴版
'''
from selenium import webdriver
from selenium.webdriver.common.keys import Keys  # 鍵盤按鍵操做
import time

#
def get_good(driver):
    num = 1
    try:
        time.sleep(5)

        #下拉滑動5000px
        js_code = '''
                   window.scrollTo(0,5000)
                   '''
        driver.execute_script(js_code)

        time.sleep(5) # 商品信息加載,等待5s
        good_list = driver.find_elements_by_class_name('gl-item')
        for good in good_list:
            # 商品名稱
            good_name = good.find_element_by_css_selector('.p-name em').text
            # 商品連接
            good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
            # 商品價格
            good_price = good.find_element_by_class_name('p-price').text
            # 商品評價
            good_commit = good.find_element_by_class_name('p-commit').text

            good_content = '''
                       num:{}
                       商品名稱:{}
                       商品連接:{}
                       商品價格:{}
                       商品評論:{}
                       \n
                       '''.format(num,good_name,good_url,good_price,good_commit)
            print(good_content)
            # 保存數據寫入文件
            with open('京東商品信息爬取.txt', 'a', encoding='utf-8') as f:
                f.write(good_content)
            num += 1

        # 找到頁面下一頁點擊
        next_tag = driver.find_element_by_class_name('pn-next')
        next_tag.click()

        time.sleep(5)
        #遞歸調用函數自己
        get_good(driver)

    finally:
        driver.close()

if __name__ == '__main__':
    driver = webdriver.Chrome(r'C:\Users\Auraro\Desktop/chromedriver.exe')
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.jd.com/')

        input = driver.find_element_by_id('key')
        input.send_keys('人間失格')
        input.send_keys(Keys.ENTER)
        get_good(driver)
        print('商品信息寫入完成')
    finally:
        driver.close()
相關文章
相關標籤/搜索