# -*- coding: UTF-8 -*-
# encoding=utf8
from __future__ import print_function
from bs4 import BeautifulSoup
import pymysql
import re
from urllib import urlopen
import requests
import json
import math
import collections
import sys
def parseBaseInfo():
reload(sys)
sys.setdefaultencoding("utf-8")
headers = {'Content-Type': 'application/json'}
pageInfoFile= open("./dangdang.json", 'r')
pageInfo=json.load(pageInfoFile)
f=open('/Users/chanming/Desktop/c.txt','a')
for x in xrange(2,346):
url="http://e.dangdang.com/media/api.go?action=getPcChapterInfo&epubID=1900072881&consumeType=1&platform=3&deviceType=Android&deviceVersion=5.0.0&channelId=70000&platformSource=DDDS-P&fromPaltform=ds_android&deviceSerialNo=html5&clientVersionNo=5.8.4&token=pc_b5e9f2bd585a6885ddad9d08bf7901292b1d1cf4c1934b742f5f4bd37dee5ff7&chapterID={}&pageIndex={}&locationIndex=5&wordSize=2&style=2&autoBuy=0&chapterIndex="
url=url.format(pageInfo['pagenum'+str(x)]['chapterID'],pageInfo['pagenum'+str(x)]['pageIndex'])
result= requests.get(url,headers)
resultjson=result.json()
chapterInfo=resultjson['data']['chapterInfo']
chapterInfojson=json.loads(chapterInfo)
snippet=chapterInfojson['snippet']
soup = BeautifulSoup(snippet, 'html.parser')
rows={}
for span in soup.find_all('span'):
# print(span.get_text())
text=span.get_text()
style=span.attrs['style']
left= re.findall("\d+",style)[0]
bottom=re.findall("\d+",style)[1]
left=int(left)
bottom=int(bottom)
try:
rows[bottom]
except:
rows[bottom]=[]
rows[bottom].append((left,text))
for key in sorted(rows.keys(),reverse=True):
row=rows[key]
row.sort(key=lambda k: k[0])
for cell in row:
print(cell[1],end='')
f.write(cell[1])
print()
f.write('\n')
f.write('\n')
f.write('\n')
f.write('\n')
f.close()
parseBaseInfo()
# def test():
# for x in xrange(2,346):
# print(x)
# test()
12903