1.進入淘寶,主頁:https://www.taobao.com/css
2.搜索:美食,點擊搜索html
3.獲得當前搜索結果商品的:price(價格),location(銷售地),shop(商店名稱),image(圖片),title(商品名稱),product_link商品鏈接web
4切換點擊到最後一頁中止chrome
代碼:瀏覽器
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import re from bs4 import BeautifulSoup SERVICE_ARGS = ['--load-images=false', '--disk-cache=false'] # 全局變量 # 無界面的瀏覽器 options = Options() options.add_argument("--headless") driver = webdriver.Chrome(service_args=SERVICE_ARGS, options=options) # 設置瀏覽器的窗口大小 # driver.set_window_size(width=1360, height=768) # 第一個參數是傳入driver,第二個參數是等待時間 wait = WebDriverWait(driver, 5) def next_page(page): print("正在切換===", page, "頁") input = driver.find_element_by_css_selector("#mainsrp-pager div div div div.form input") # 清空內容 input.clear() # 輸入傳入的頁面 input.send_keys(page) submit = driver.find_element_by_css_selector("#mainsrp-pager div div div div.form span.btn.J_Submit") submit.click() # 判斷是否切換成功,等待校驗是否成功 wait.until( EC.text_to_be_present_in_element((By.CSS_SELECTOR, "#mainsrp-pager > div > div > div > ul > li.item.active"), str(page))) # 解析對應頁面的數據 get_product_info(page) def get_product_info(page): print("當前正在解析========", page, "頁") # 判斷頁面是否加載完成 wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#mainsrp-itemlist .items .item"))) # 當前頁面的數據 html = driver.page_source soup = BeautifulSoup(html, "lxml") # 獲得當前頁面的因此的商品標籤item product_lists = soup.select("#mainsrp-itemlist .items .item") for product in product_lists: print("---" * 100) item = {} # 出售地點 location = product.select(".location")[0].text # 店鋪名稱 shopname = product.select(".shopname")[0].text # 商品名稱 title = product.select(".title .J_ClickStat")[0].text # 商品的圖片 iamge = product.select("img")[0]["data-src"] data_link = product.select(".pic-link.J_ClickStat.J_ItemPicA")[0]["href"] # print(location,shopname,title,iamge,data_link) item["location"] = location item["shopname"] = shopname item["title"] = title item["iamge"] = iamge item["data_link"] = data_link print(item) # 獲得總頁數 def get_total_page(): driver.get("https://www.taobao.com/") # 等待搜索寬出現 input = wait.until(EC.presence_of_element_located((By.ID, "q"))) input.clear() input.send_keys("美食") # 按點擊按鈕 driver.find_element_by_css_selector(".btn-search").click() # 第1頁 # 獲得總頁數 total = driver.find_element_by_class_name("total").text # print(total) total_num = re.compile(r'\d+').search(total).group() # 寫一個函數獲取當前頁(第一頁的數據) get_product_info(1) return total_num if __name__ == "__main__": toto_page = get_total_page() print("總頁數===", toto_page) for page in range(2, int(toto_page) + 1): print(page) next_page(page) # 退出瀏覽器 driver.quit()
運行效果:less