(一)初級爬取:css
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') num=1 try: driver.implicitly_wait(10) #往京東發送請求 driver.get('http://www.jd.com/') input_tag=driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(3) good_list=driver.find_elements_by_class_name('gl-item') for good in good_list: #print(good) #商品名字 good_name=good.find_element_by_css_selector('.p-name em').text #print(good_name) #商品連接詳情 good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href') #print(good_url) #商品價格 good_price=good.find_element_by_class_name('p-price').text #print(good_price) #商品評價 good_commit=good.find_element_by_class_name('p-commit').text good_content=f''' num={num} 商品名稱:{good_name} 商品連接:{good_url} 商品價格:{good_price} 商品評價:{good_commit} \n ''' print(good_content) with open('jd.txt','a',encoding='utf-8')as f: f.write(good_content) num+=1 print('商品寫入完畢...') next_tag=driver.find_element_by_class_name('pn-next') next_tag.click() finally: driver.close()
(二)中級爬取web
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') num=1 try: driver.implicitly_wait(10) #往京東發送請求 driver.get('http://www.jd.com/') input_tag=driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(5) #下拉滑動5000px js_code=''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(3) good_list=driver.find_elements_by_class_name('gl-item') for good in good_list: #print(good) #商品名字 good_name=good.find_element_by_css_selector('.p-name em').text #print(good_name) #商品連接詳情 good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href') #print(good_url) #商品價格 good_price=good.find_element_by_class_name('p-price').text #print(good_price) #商品評價 good_commit=good.find_element_by_class_name('p-commit').text good_content=f''' num={num} 商品名稱:{good_name} 商品連接:{good_url} 商品價格:{good_price} 商品評價:{good_commit} \n ''' print(good_content) with open('jd.txt','a',encoding='utf-8')as f: f.write(good_content) num+=1 print('商品寫入完畢...') next_tag=driver.find_element_by_class_name('pn-next') next_tag.click() finally: driver.close()
(三)高級爬取chrome
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') def get_good(driver): num=1 try: time.sleep(5) # 下拉滑動5000px js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(5) good_list = driver.find_elements_by_class_name('gl-item') for good in good_list: # print(good) # 商品名字 good_name = good.find_element_by_css_selector('.p-name em').text # print(good_name) # 商品連接詳情 good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href') # print(good_url) # 商品價格 good_price = good.find_element_by_class_name('p-price').text # print(good_price) # 商品評價 good_commit = good.find_element_by_class_name('p-commit').text good_content = f''' num={num} 商品名稱:{good_name} 商品連接:{good_url} 商品價格:{good_price} 商品評價:{good_commit} \n ''' print(good_content) with open('jd.txt', 'a', encoding='utf-8')as f: f.write(good_content) num+=1 print('商品寫入完畢...') next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(5) #遞歸調用函數自己 get_good(driver) finally: driver.close() if __name__=='__main__': driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') try: driver.implicitly_wait(10) # 往京東發送請求 driver.get('http://www.jd.com/') input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) #調用獲取商品信息函數 get_good(driver) finally: driver.close()