selenium登陸csdn,urllib抓取數據:html
import selenium import requests import selenium.webdriver import selenium.webdriver.common.keys import time #須要手動滑動驗證碼 driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/login?code=public") login=driver.find_element_by_link_text("帳號密碼登陸") login.click() time.sleep(5) username=driver.find_element_by_id("all") username.send_keys("用戶名") time.sleep(3) password=driver.find_element_by_id("password-number") password.send_keys("密碼") time.sleep(5) logins=driver.find_element_by_xpath("//*[@id=\"app\"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button") time.sleep(10) #手動滑動驗證碼 logins.click() print(driver.page_source) time.sleep(15) #等待cookie加載 print("開始會話") req=requests.session() #會話 打開一個網頁,直到關閉瀏覽器以前 都是會話 cookies=driver.get_cookies() #抓取所有的cookie for cookie in cookies: req.cookies.set(cookie['name'],cookie["value"]) req.headers.clear()#清空頭 newpage=req.get("http://my.csdn.net/") print("會話完成") print(newpage.text) #頁面 time.sleep(10) driver.close()
urllib保存cookie:web
#coding:utf-8 import selenium import selenium.webdriver import time import lxml import lxml.etree import requests import urllib.request ''' driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/account/login?") time.sleep(3) user=driver.find_element_by_id("username") password=driver.find_element_by_id("password") submit=driver.find_element_by_class_name("logging") user.clear() password.clear() time.sleep(1) user.send_keys("yincheng01@163.com") password.send_keys("yinchengak47.net") time.sleep(1) submit.click() time.sleep(10) #等待頁面加載, cookies=driver.get_cookies()#抓取所有的cookie print cookies print "------------------------" driver.close() ''' print("開始會話") headers={ # "Host": "my.csdn.net", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "DNT": "1", "Referer": "http://www.csdn.net/", #"Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", u"cookie":u"uuid_tt_dd=-1734079490838081701_20171010; bdshare_firstime=1507966544895; UserName=yinghuming; UserInfo=LZTCl6p9mr%2BUgX1cEEgqwIand1mBReKkuogvIYHivh6MdgAq8c4Y4%2Fmx1uhFT%2FmWFuTu%2BCna36D%2BZ7ssW7xuzAjlIwc7Vgjd7Y7zTDJqy%2FakzOPFEGR52GRrp8sf0i9NK7p2hdvM39vRq5Y7NLJObQ%3D%3D; UserNick=%E8%8B%B1%E9%9B%84%E6%97%A0%E6%95%8C2017; AU=821; UD=%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80; UN=yincheng0571; UE=\"yincheng01@163.com\"; BT=1508039179648; access-token=8260e0b7-a35c-419d-b4af-1f02d51c677d; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1507965242,1507969974,1508038063,1508039035; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508039041; dc_tos=oxuidd; dc_session_id=1508039034960_0.6956040327941211" } request=urllib.request.Request("http://my.csdn.net/",headers=headers) response=urllib.request.urlopen(request) newpagetext=response.read() file=open("csdn.txt","wb") file.write(newpagetext) file.close() print(newpagetext) print("會話完成") time.sleep(10) time.sleep(10) #driver.close()
selenium+urllib 模擬登陸 抓取數據:瀏覽器
import selenium import selenium.webdriver import selenium.webdriver.common.keys import urllib.request import time #須要手動滑動驗證碼 driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/login?code=public") login=driver.find_element_by_link_text("帳號密碼登陸") login.click() time.sleep(5) username=driver.find_element_by_id("all") username.send_keys("用戶名") time.sleep(3) password=driver.find_element_by_id("password-number") password.send_keys("密碼") time.sleep(5) logins=driver.find_element_by_xpath("//*[@id=\"app\"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button") time.sleep(10) #手動滑動驗證碼 logins.click() print(driver.page_source) time.sleep(15) #等待cookie加載 cookies=driver.get_cookies() #抓取所有的cookie print(cookies) cookiestr="" for cookie in cookies: #每一條cookie信息 print(cookie['name'],cookie["value"]) #通常用於登陸的信息都在name和value裏 cookiestr += (str(cookie["name"]) + "=" + str(cookie["value"]) + ";") print("------------------------") #cookies print("開始會話") headers={ "Host": "my.csdn.net", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "DNT": "1", "Referer": "http://www.csdn.net/", #"Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", u"cookie": cookiestr # u爲中文轉義 } request=urllib.request.Request("http://my.csdn.net/",headers=headers) response=urllib.request.urlopen(request) newpagetext=response.read() file=open("csdn.txt","wb") file.write(newpagetext) file.close() print(newpagetext) print("會話完成") time.sleep(10) driver.close()