拉取微信全部服務商

#!/usr/bin/python
# -*- coding: UTF-8 -*-

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import sys
import pymysql
from selenium.common.exceptions import NoSuchElementException

reload(sys)
sys.setdefaultencoding('utf-8')

driver = webdriver.Firefox(executable_path="/Users/chanming/Desktop/geckodriver")
# map={'拓展地域:':'dis','聯 系 人 :':'userName','電  話:':'phone','郵  箱:':'email','地  址:':'address','所屬行業:':'industry'}
map={}
flag=True
keyWords=[u'西安',u"肇慶",u"汕尾",u"茂名",u"深圳",u"陽江",u"潮州",u"韶關",u"梅州",u"河源",u"清遠",u"東莞",u"雲浮",u"揭陽",u"廣州",u"中山",u"東沙羣島",u"濮陽",u"洛陽",u"三門峽",u"鄭州",u"許昌",u"南陽",u"漯河",u"濟源",u"平頂山",u"信陽",u"焦做",u"安陽",u"駐馬店",u"商丘",u"新鄉",u"鶴壁",u"周口",u"開封",u"烏海",u"巴彥淖爾",u"呼倫貝爾",u"鄂爾多斯",u"包頭",u"呼和浩特",u"通遼",u"赤峯",u"阿拉善盟",u"興安盟",u"錫林郭勒盟",u"烏蘭察布",u"大興安嶺地區",u"七臺河",u"大慶",u"鶴崗",u"伊春",u"綏化",u"佳木斯",u"齊齊哈爾",u"雞西",u"雙鴨山",u"哈爾濱",u"黑河",u"牡丹江",u"北屯",u"雙河",u"鐵門關",u"博爾塔拉蒙古自治州",u"和田地區",u"塔城地區",u"昆玉",u"可克達拉",u"石河子",u"阿勒泰地區",u"克拉瑪依",u"昌吉回族自治州",u"五家渠",u"巴音郭楞蒙古自治州",u"阿拉爾",u"圖木舒克",u"喀什地區",u"伊犁哈薩克自治州",u"烏魯木齊",u"阿克蘇地區",u"克孜勒蘇柯爾克孜自治州",u"哈密",u"吐魯番",u"十堰",u"宜昌",u"孝感",u"武漢",u"恩施土家族苗族自治州",u"天門",u"黃岡",u"潛江",u"襄陽",u"荊門",u"仙桃",u"神農架林區",u"隨州",u"荊州",u"咸寧",u"黃石",u"鄂州",u"大連",u"葫蘆島",u"錦州",u"丹東",u"撫順",u"瀋陽",u"鞍山",u"鐵嶺",u"遼陽",u"盤錦",u"營口",u"朝陽",u"阜新",u"本溪",u"威海",u"煙臺",u"濱州",u"臨沂",u"萊蕪",u"淄博",u"青島",u"聊城",u"德州",u"日照",u"菏澤",u"濰坊",u"濟南",u"泰安",u"東營",u"濟寧",u"棗莊",u"商洛",u"西安",u"漢中",u"安康",u"榆林",u"銅川",u"咸陽",u"寶雞",u"延安",u"渭南",u"銅仁",u"六盤水",u"遵義",u"安順",u"畢節",u"貴陽",u"重啓",u"昌都",u"拉薩",u"那曲",u"日喀則",u"山南",u"林芝",u"阜陽",u"淮北",u"銅陵",u"蚌埠",u"馬鞍山",u"池州",u"亳州",u"滁州",u"安慶",u"黃山",u"宣城",u"蕪湖",u"六安",u"淮南",u"合肥",u"宿州",u"寧德",u"福州",u"龍巖",u"莆田",u"泉州",u"三明",u"廈門",u"漳州",u"南平",u"岳陽",u"衡陽",u"婁底",u"湘潭",u"益陽",u"長沙",u"常德",u"懷化",u"邵陽",u"張家界",u"株洲",u"永州",u"郴州",u"瓊海",u"東方",u"文昌",u"五指山",u"三沙",u"三亞",u"萬寧",u"儋州",u"海口",u"宿遷",u"連雲港",u"揚州",u"南京",u"南通",u"無錫",u"鎮江",u"淮安",u"泰州",u"徐州",u"蘇州",u"常州",u"鹽城",u"海東",u"西寧",u"欽州",u"桂林",u"百色",u"北海",u"貴港",u"河池",u"柳州",u"南寧",u"來賓",u"崇左",u"賀州",u"防城港",u"梧州",u"玉林",u"固原",u"中衛",u"銀川",u"石嘴山",u"吳忠",u"景德鎮",u"九江",u"撫州",u"上饒",u"新餘",u"贛州",u"鷹潭",u"南昌",u"吉安",u"萍鄉",u"宜春",u"寧波",u"台州",u"嘉興",u"舟山",u"溫州",u"衢州",u"金華",u"麗水",u"杭州",u"紹興",u"湖州",u"邢臺",u"邯鄲",u"唐山",u"承德",u"張家口",u"廊坊",u"滄州",u"衡水",u"秦皇島",u"保定",u"石家莊",u"深水埗區",u"西貢區",u"元朗區",u"油尖旺區",u"灣仔區",u"屯門區",u"黃大仙區",u"東區",u"中西區",u"大埔區",u"九龍城區",u"沙田區",u"南區",u"觀塘區",u"北區",u"離島區",u"荃灣區",u"葵青區",u"陽泉",u"太原",u"臨汾",u"大同",u"晉城",u"忻州",u"長治",u"運城",u"朔州",u"晉中",u"呂梁",u"風順堂區",u"花地瑪堂區",u"聖方濟各堂區",u"花王堂區",u"路凼填海區",u"大堂區",u"嘉模堂區",u"望德堂區",u"蘭州",u"金昌",u"嘉峪關",u"酒泉",u"平涼",u"白銀",u"張掖",u"甘南藏族自治州",u"臨夏回族自治州",u"隴南",u"天水",u"武威",u"定西",u"慶陽",u"廣元",u"南充",u"達州",u"眉山",u"德陽",u"遂寧",u"巴中",u"廣安",u"資陽",u"成都",u"綿陽",u"內江",u"宜賓",u"自貢",u"雅安",u"攀枝花",u"樂山",u"甘孜藏族自治州",u"瀘州",u"阿壩藏族羌族自治州",u"涼山彝族自治州",u"昭通",u"曲靖",u"紅河哈尼族彝族自治州",u"麗江",u"西雙版納傣族自治州",u"保山",u"文山壯族苗族自治州",u"大理白族自治州",u"怒江傈僳族自治州",u"迪慶藏族自治州",u"玉溪",u"普洱",u"昆明",u"楚雄彝族自治州",u"德宏傣族景頗族自治州",u"臨滄",u"長春",u"遼源",u"吉林",u"白城",u"松原",u"四平",u"延邊朝鮮族自治州",u"白山",u"通化"]
db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8")
cursor = db.cursor()
sql = 'insert into wx_pay_company(dis,userName,phone,email,address,industry,title,searchword) values(%s,%s,%s,%s,%s,%s,%s,%s)'
for keyword in keyWords:
	print keyword
	driver.get("https://pay.weixin.qq.com/index.php/partner/public/search")
	time.sleep(3)
	alist=list()
	st=driver.find_element_by_id('searchPortalText')
	st.clear()
	st.send_keys(keyword)
	driver.find_element_by_id("searchPortalSubmit").click()
	time.sleep(1)
	try:
		hideMsg=driver.find_element_by_css_selector('.page-msg.mini.page-empty.hide')
	except NoSuchElementException:
		print keyword+'沒有查詢結果'
		continue

	pageClass=driver.find_element_by_id("service_provider_query_page").get_attribute('class')
	pindex=pageClass.find('hide')
	if pindex==-1:
		print '結果多頁'
		totalpage=driver.find_element_by_name("totalpage")
		for x in xrange(1,int(totalpage.text)+1):
			print x
			nextInput=driver.find_element_by_css_selector('.goto-area input')
			nextInput.clear()
			nextInput.send_keys(x)
			nextButton=driver.find_element_by_css_selector('.goto-area a')
			nextButton.click()
			time.sleep(3)
			searchList=driver.find_elements_by_css_selector('#searchResultList dl')
			for comp in searchList:
				obj={}
				try:
					dt=comp.find_element_by_tag_name('dt')
				except Exception as e:
					continue
				
				obj['title']=dt.text
				propNameList=comp.find_elements_by_class_name('lbl')
				if flag:
					map[propNameList[0].text]='dis'
					map[propNameList[1].text]='userName'
					map[propNameList[2].text]='phone'
					map[propNameList[3].text]='email'
					map[propNameList[4].text]='address'
					map[propNameList[5].text]='industry'
					flag=False
				propList=comp.find_elements_by_class_name('ele')
				k=0
				for propName in propNameList:
					obj[map[propName.text]]=propList[k].text
					k=k+1
				alist.append(obj)				
	else:
		print '結果單頁'
		searchList=driver.find_elements_by_css_selector('#searchResultList dl')
		for comp in searchList:
			obj={}
			try:
				dt=comp.find_element_by_tag_name('dt')
			except Exception as e:
				continue
			obj['title']=dt.text
			propNameList=comp.find_elements_by_class_name('lbl')
			if flag:
				map[propNameList[0].text]='dis'
				map[propNameList[1].text]='userName'
				map[propNameList[2].text]='phone'
				map[propNameList[3].text]='email'
				map[propNameList[4].text]='address'
				map[propNameList[5].text]='industry'
				flag=False
			propList=comp.find_elements_by_class_name('ele')
			k=0
			for propName in propNameList:
				obj[map[propName.text]]=propList[k].text
				k=k+1
			alist.append(obj)





	for company in alist:
		print '插入數據庫'
		if not company.has_key('dis'):
			company['dis']=''
		if not company.has_key('userName'):
			company['userName']=''
		if not company.has_key('phone'):
			company['phone']=''
		if not company.has_key('email'):
			company['email']=''
		if not company.has_key('address'):
			company['address']=''
		if not company.has_key('industry'):
			company['industry']=''
		cursor.execute(sql,(company['dis'],company['userName'],company['phone'],company['email'],company['address'],company['industry'],company['title'],keyword))
		db.commit()


cursor.close()
db.close()
driver.close()

print '運行結束'
相關文章
相關標籤/搜索