python爬蟲學習(3)_模擬登錄

1.登錄超星慕課,chrome抓包,模擬header,提取表單隱藏元素構成params。html

  主要是驗證碼圖片地址,在js中發現由js->new Date().getTime()時間戳動態生成url,python對應time.time(),生成驗證碼圖片url,圖片下載在本地,手動輸入。代碼以下:python

  

#coding=utf-8
import requests
import time
from bs4 import BeautifulSoup
header={
         'Referer':'http://aust.fanya.chaoxing.com/portal',
         'Upgrade-Insecure-Requests':'1',
         'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36        (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
name=raw_input("input name:")
password=raw_input("input password:")
num=int(time.time()) #時間戳,取整
code_url='http://passport2.chaoxing.com/num/code/?'+str(num) #圖片url
session=requests.Session()
r=session.get(code_url)
image=r.content
with open('/home/zhanyunwu/code.jpg','wb') as f:
    f.write(image)
numcode=raw_input("input code:")
#post的參數
params={
    'refer_0x001':'http%3A%2F%2Fi.mooc.chaoxing.com%2Fspace%2Findex.shtml',
    'pid':'1',
    'pidName':'',
    'fid':'12007',
    'fidName':'安徽理工大學',
    'allowJoin':'0',
    'isCheckNumCode':'1',
    'f':'0',
    'uname':name,
    'password':password,
    'numcode':numcode
}
url='http://passport2.chaoxing.com/login' #form提交的url
req=session.post(url,params,headers=header)
courses=session.get('http://mooc12.chaoxing.com/visit/courses',cookies=req.cookies,headers=header) #經過成功登錄的cookie訪問其餘頁面    

 2.瀏覽器已成功登錄,經過保存的cookie登錄豆瓣chrome

 

#coding=utf-8
import requests
session=requests.Session()
cookie={}
allcookie='ll="118190"; bid=c3kC6ui9q28; _pk_id.100001.8cb4=4c5ed6a80ede35ed.1471684466.1.1471684546.1471684466.; _pk_ses.100001.8cb4=*; __utma=30149280.794301906.1471684473.1471684473.1471684473.1; __utmb=30149280.2.9.1471684473; __utmc=30149280; __utmz=30149280.1471684473.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmt=1; dbcl2="140658732:f1Vx65Uloqc"; ck=FGYf; push_noty_num=0; push_doumail_num=0; _vwo_uuid_v2=0B4AF16F37C54670B861F7D7A7C5B679|5b7205084917bf0bf6bd9380a8224a9d'
for c in allcookie.split(";"):
    key,value=c.split("=",1)
    cookie[key]=value
s=session.get('http://www.douban.com/people/140658732/',cookies=cookie)
print s.content
text=s.content
with open("/home/zhanyunwu/test.html","wb") as f1:
   f1.write(text)
相關文章
相關標籤/搜索