爬取京東商品信息css
from selenium import webdriver from selenium.webdriver import ChromeOptions from selenium.webdriver import ActionChains from selenium.webdriver.common.keys import Keys import time option = ChromeOptions() option.add_argument('disable-infobars') driver = webdriver.Chrome(chrome_options=option) def get_goods(driver): number = 400 for line in range(20): js = ''' window.scrollTo(0, %s) ''' % number number += 500 driver.execute_script(js) time.sleep(0.2) # 查找全部商品的父標籤 good_div = driver.find_element_by_id('J_goodsList') # 獲取全部商品的標籤 good_list = good_div.find_elements_by_class_name('gl-item') for good in good_list: ''' 商品信息: 名稱 價格 連接 圖片 評價人數 ''' # 商品名稱 good_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '') # 商品價格 good_price = good.find_element_by_css_selector('.p-price').text.replace('\n', '') # 商品連接 good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href') # 商品圖片 good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src') # 評價人數 good_commit = good.find_element_by_css_selector('.p-commit').text.replace('\n', ' ') goods = ''' 商品名稱: %s 商品價格: %s 商品連接: %s 商品圖片: %s 評價人數: %s ''' % (good_name, good_price, good_link, good_img, good_commit) print(goods) with open('京東女士內衣數據爬去.txt', 'a', encoding='utf-8') as f: f.write(goods + '\n') next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(3) # 遞歸執行get_goods函數 get_goods(driver) try: driver.get('https://www.jd.com/') driver.implicitly_wait(10) input_tag = driver.find_element_by_id('key') input_tag.send_keys('女士內衣') search_button = driver.find_element_by_class_name('button') search_button.click() get_goods(driver) time.sleep(1000) finally: driver.close()