個人sisiter,想要我爬一些試題給她。有80套,她不想手工點,因此,我來了,比較簡單的網站。因此沒有費很大的力氣。期間參考了一系列的網站,都沒有記錄下來。html
#!/usr/bin/env python #coding=utf-8 import pdfkit import time import requests import sys import urllib2 import re def get_hiddenvalue(url): request=urllib2.Request(url) reponse=urllib2.urlopen(request) resu=reponse.read() VIEWSTATE=re.findall(r'<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />',resu, re.I) EVENTVALIDATION =re.findall(r'input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />', resu,re.I) return VIEWSTATE[0],EVENTVALIDATION[0] def get_hiddenvalue_string(myStr): VIEWSTATE=re.findall(r'<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />',myStr, re.I) EVENTVALIDATION =re.findall(r'input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />', myStr,re.I) return VIEWSTATE[0],EVENTVALIDATION[0] reload(sys) sys.setdefaultencoding( "utf-8" ) data = { 'cid':'1', 'pid':'5' } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', } options = { 'page-size': 'A4', 'margin-top': '0mm', 'margin-right': '0mm', 'margin-bottom': '0mm', 'margin-left': '0mm', # 'orientation':'Landscape',#橫向 'encoding': "UTF-8", 'no-outline': None, # 'footer-right':'[page]' 設置頁碼 } myMap=["UniversityPsychology","HigherEducationRegulations","HigherEducation","TeacherEthics"] confg = pdfkit.configuration(wkhtmltopdf='/usr/local/bin/wkhtmltopdf') for i in range(4): for j in range(20): data['cid'] = str(i+1) data['pid'] = str(j+1) url='http://zjzx.zjnu.edu.cn/test/Default.aspx?cid='+str(i+1)+'&pid='+str(j+1) #response = requests.post(url=url,data=data,headers=headers) myStr = myMap[i]+'_'+'exam'+str(j+1)+'.pdf' #response.encoding = 'utf-8' myStr1 = myMap[i]+'_'+'exam'+str(j+1)+'.html' data['Button1'] = '提交併查看單選題答案' data['drop1']= '1' data['drop2']='1' data['__VIEWSTATE'],data['__EVENTVALIDATION'] = get_hiddenvalue(url) # 請查閱__VIEWSTATE __EVENTVALIDATION 一個是把全部的按鍵信息編碼上傳,一個是加密用的 response = requests.post(url=url,data=data,headers=headers) responseReturn = response.text time.sleep(3) del data['Button1'] data['__VIEWSTATE'],data['__EVENTVALIDATION']=get_hiddenvalue_string(responseReturn) data['Button2']='提交併查看多選題答案' response = requests.post(url=url,data=data,headers=headers) responseReturn = response.text time.sleep(3) del data['Button2'] data['__VIEWSTATE'],data['__EVENTVALIDATION']=get_hiddenvalue_string(responseReturn) data['Button3']='提交併查看判斷題答案' response = requests.post(url=url,data=data,headers=headers) responseReturn = response.text del data['Button3'] with open(myStr1,'a') as file: file.write(responseReturn) with open(myStr1, 'r') as file: answer = file.read() pdfkit.from_string(answer, myStr,configuration=confg,options=options) #pdfkit.from_url(url, myStr,configuration=confg)