&&&&&python
# -*- coding: utf-8 -* from xlwt import Workbook import requests from bs4 import BeautifulSoup import sys reload(sys) sys.setdefaultencoding('utf8') import time def beida(page): url = 'http://162.105.134.150/searchCompy' data ={ 'eventId':'', 'loginName':'', 'keyWords':'', 'page.currentPage':page, 'qc.coName': '', 'qc.year': '0', 'qc.lp': '', 'qc.province': '', 'qc.co39': '0', 'qc.co42': '0', 'qc.co_data_15': '0', 'qc.co35': '0', 'qc.co_data_12': '0', 'qc.co_data_16': '0', 'qc.co34': '0', 'qc.active': '' } response =requests.post(url,data=data) soup =BeautifulSoup(response.text,'lxml') tableList = soup.find('div',class_='m-cont').find_all('tr') tableList.pop(0) dataInforList =[] for data in tableList: inforList = data.find_all('td') inforData = [] for info in inforList: inforData.append( ''.join(info.text.split())) dataInforList.append(inforData) return dataInforList def saveToExecl(start,end): book = Workbook(encoding='utf-8') # 設置execl編碼格式 sheet1 = book.add_sheet('Sheet 1') # 操做execl表格 sheet1.write(0, 0, u'序號') sheet1.write(0, 1, u'法人單位名稱') sheet1.write(0, 2, u'法人') sheet1.write(0, 3, u'省(自治區、直轄市)') sheet1.write(0, 4, u'街道') sheet1.write(0, 5, u'年份') sheet1.write(0, 6, u'組織機構代碼') sheet1.write(0, 7, u'主要業務活動') sheet1.write(0, 8, u'行業') sheet1.write(0, 9, u'登記註冊類型') sheet1.write(0, 10, u'企業控股狀況') sheet1.write(0, 11, u'隸屬關係') sheet1.write(0, 12, u'企業營業狀態') sheet1.write(0, 13, u'機構類型') sheet1.write(0, 14, u'營業收入(元)') sheet1.write(0, 15, u'企業規模') sheet1.write(0, 16, u'輕重工業') writeDataList = [] print "The number of pages being downloaded now...." for page in range(int(start), int(end)): try: writeDataList += beida(page) print page except: print page time.sleep(3) writeDataList += beida(page) datalist = writeDataList for data in range(0, len(datalist)): # 遍歷數據列表,而後把數據寫入表格中 line01 = datalist[data][0] line02 = datalist[data][1] line03 = datalist[data][2] line04 = datalist[data][3] line05 = datalist[data][4] line06 = datalist[data][5] line07 = datalist[data][6] line08 = datalist[data][7] line09 = datalist[data][8] line10 = datalist[data][9] line11 = datalist[data][10] line12 = datalist[data][11] line13 = datalist[data][12] line14 = datalist[data][13] line15 = datalist[data][14] line16 = datalist[data][15] line17 = datalist[data][16] sheet1.write(data + 1, 0, line01) sheet1.write(data + 1, 1, line02) sheet1.write(data + 1, 2, line03) sheet1.write(data + 1, 3, line04) sheet1.write(data + 1, 4, line05) sheet1.write(data + 1, 5, line06) sheet1.write(data + 1, 6, line07) sheet1.write(data + 1, 7, line08) sheet1.write(data + 1, 8, line09) sheet1.write(data + 1, 9, line10) sheet1.write(data + 1, 10, line11) sheet1.write(data + 1, 11, line12) sheet1.write(data + 1, 12, line13) sheet1.write(data + 1, 13, line14) sheet1.write(data + 1, 14, line15) sheet1.write(data + 1, 15, line16) sheet1.write(data + 1, 16, line17) fileName = '中國工業企業數據庫'+ str(start) +'-'+ str(end) +'.xls' book.save(u"%s" % fileName) if __name__ == "__main__": print "*********************Chinese industrial enterprise database download program*********************" start = raw_input("please input start page number: ") end = raw_input("please input end page number: ") saveToExecl(start,end)
&&&&&數據庫