#!/usr/bin/python # -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver.common.keys import Keys import time import sys import pymysql from selenium.common.exceptions import NoSuchElementException reload(sys) sys.setdefaultencoding('utf-8') driver = webdriver.Firefox(executable_path="/Users/chanming/Desktop/geckodriver") # map={'拓展地域:':'dis','聯 系 人 :':'userName','電 話:':'phone','郵 箱:':'email','地 址:':'address','所屬行業:':'industry'} map={} flag=True keyWords=[u'西安',u"肇慶",u"汕尾",u"茂名",u"深圳",u"陽江",u"潮州",u"韶關",u"梅州",u"河源",u"清遠",u"東莞",u"雲浮",u"揭陽",u"廣州",u"中山",u"東沙羣島",u"濮陽",u"洛陽",u"三門峽",u"鄭州",u"許昌",u"南陽",u"漯河",u"濟源",u"平頂山",u"信陽",u"焦做",u"安陽",u"駐馬店",u"商丘",u"新鄉",u"鶴壁",u"周口",u"開封",u"烏海",u"巴彥淖爾",u"呼倫貝爾",u"鄂爾多斯",u"包頭",u"呼和浩特",u"通遼",u"赤峯",u"阿拉善盟",u"興安盟",u"錫林郭勒盟",u"烏蘭察布",u"大興安嶺地區",u"七臺河",u"大慶",u"鶴崗",u"伊春",u"綏化",u"佳木斯",u"齊齊哈爾",u"雞西",u"雙鴨山",u"哈爾濱",u"黑河",u"牡丹江",u"北屯",u"雙河",u"鐵門關",u"博爾塔拉蒙古自治州",u"和田地區",u"塔城地區",u"昆玉",u"可克達拉",u"石河子",u"阿勒泰地區",u"克拉瑪依",u"昌吉回族自治州",u"五家渠",u"巴音郭楞蒙古自治州",u"阿拉爾",u"圖木舒克",u"喀什地區",u"伊犁哈薩克自治州",u"烏魯木齊",u"阿克蘇地區",u"克孜勒蘇柯爾克孜自治州",u"哈密",u"吐魯番",u"十堰",u"宜昌",u"孝感",u"武漢",u"恩施土家族苗族自治州",u"天門",u"黃岡",u"潛江",u"襄陽",u"荊門",u"仙桃",u"神農架林區",u"隨州",u"荊州",u"咸寧",u"黃石",u"鄂州",u"大連",u"葫蘆島",u"錦州",u"丹東",u"撫順",u"瀋陽",u"鞍山",u"鐵嶺",u"遼陽",u"盤錦",u"營口",u"朝陽",u"阜新",u"本溪",u"威海",u"煙臺",u"濱州",u"臨沂",u"萊蕪",u"淄博",u"青島",u"聊城",u"德州",u"日照",u"菏澤",u"濰坊",u"濟南",u"泰安",u"東營",u"濟寧",u"棗莊",u"商洛",u"西安",u"漢中",u"安康",u"榆林",u"銅川",u"咸陽",u"寶雞",u"延安",u"渭南",u"銅仁",u"六盤水",u"遵義",u"安順",u"畢節",u"貴陽",u"重啓",u"昌都",u"拉薩",u"那曲",u"日喀則",u"山南",u"林芝",u"阜陽",u"淮北",u"銅陵",u"蚌埠",u"馬鞍山",u"池州",u"亳州",u"滁州",u"安慶",u"黃山",u"宣城",u"蕪湖",u"六安",u"淮南",u"合肥",u"宿州",u"寧德",u"福州",u"龍巖",u"莆田",u"泉州",u"三明",u"廈門",u"漳州",u"南平",u"岳陽",u"衡陽",u"婁底",u"湘潭",u"益陽",u"長沙",u"常德",u"懷化",u"邵陽",u"張家界",u"株洲",u"永州",u"郴州",u"瓊海",u"東方",u"文昌",u"五指山",u"三沙",u"三亞",u"萬寧",u"儋州",u"海口",u"宿遷",u"連雲港",u"揚州",u"南京",u"南通",u"無錫",u"鎮江",u"淮安",u"泰州",u"徐州",u"蘇州",u"常州",u"鹽城",u"海東",u"西寧",u"欽州",u"桂林",u"百色",u"北海",u"貴港",u"河池",u"柳州",u"南寧",u"來賓",u"崇左",u"賀州",u"防城港",u"梧州",u"玉林",u"固原",u"中衛",u"銀川",u"石嘴山",u"吳忠",u"景德鎮",u"九江",u"撫州",u"上饒",u"新餘",u"贛州",u"鷹潭",u"南昌",u"吉安",u"萍鄉",u"宜春",u"寧波",u"台州",u"嘉興",u"舟山",u"溫州",u"衢州",u"金華",u"麗水",u"杭州",u"紹興",u"湖州",u"邢臺",u"邯鄲",u"唐山",u"承德",u"張家口",u"廊坊",u"滄州",u"衡水",u"秦皇島",u"保定",u"石家莊",u"深水埗區",u"西貢區",u"元朗區",u"油尖旺區",u"灣仔區",u"屯門區",u"黃大仙區",u"東區",u"中西區",u"大埔區",u"九龍城區",u"沙田區",u"南區",u"觀塘區",u"北區",u"離島區",u"荃灣區",u"葵青區",u"陽泉",u"太原",u"臨汾",u"大同",u"晉城",u"忻州",u"長治",u"運城",u"朔州",u"晉中",u"呂梁",u"風順堂區",u"花地瑪堂區",u"聖方濟各堂區",u"花王堂區",u"路凼填海區",u"大堂區",u"嘉模堂區",u"望德堂區",u"蘭州",u"金昌",u"嘉峪關",u"酒泉",u"平涼",u"白銀",u"張掖",u"甘南藏族自治州",u"臨夏回族自治州",u"隴南",u"天水",u"武威",u"定西",u"慶陽",u"廣元",u"南充",u"達州",u"眉山",u"德陽",u"遂寧",u"巴中",u"廣安",u"資陽",u"成都",u"綿陽",u"內江",u"宜賓",u"自貢",u"雅安",u"攀枝花",u"樂山",u"甘孜藏族自治州",u"瀘州",u"阿壩藏族羌族自治州",u"涼山彝族自治州",u"昭通",u"曲靖",u"紅河哈尼族彝族自治州",u"麗江",u"西雙版納傣族自治州",u"保山",u"文山壯族苗族自治州",u"大理白族自治州",u"怒江傈僳族自治州",u"迪慶藏族自治州",u"玉溪",u"普洱",u"昆明",u"楚雄彝族自治州",u"德宏傣族景頗族自治州",u"臨滄",u"長春",u"遼源",u"吉林",u"白城",u"松原",u"四平",u"延邊朝鮮族自治州",u"白山",u"通化"] db = pymysql.connect(host = '127.0.0.1', port = 3306, user = 'json', passwd = '123456', db = 'youxia', charset="utf8") cursor = db.cursor() sql = 'insert into wx_pay_company(dis,userName,phone,email,address,industry,title,searchword) values(%s,%s,%s,%s,%s,%s,%s,%s)' for keyword in keyWords: print keyword driver.get("https://pay.weixin.qq.com/index.php/partner/public/search") time.sleep(3) alist=list() st=driver.find_element_by_id('searchPortalText') st.clear() st.send_keys(keyword) driver.find_element_by_id("searchPortalSubmit").click() time.sleep(1) try: hideMsg=driver.find_element_by_css_selector('.page-msg.mini.page-empty.hide') except NoSuchElementException: print keyword+'沒有查詢結果' continue pageClass=driver.find_element_by_id("service_provider_query_page").get_attribute('class') pindex=pageClass.find('hide') if pindex==-1: print '結果多頁' totalpage=driver.find_element_by_name("totalpage") for x in xrange(1,int(totalpage.text)+1): print x nextInput=driver.find_element_by_css_selector('.goto-area input') nextInput.clear() nextInput.send_keys(x) nextButton=driver.find_element_by_css_selector('.goto-area a') nextButton.click() time.sleep(3) searchList=driver.find_elements_by_css_selector('#searchResultList dl') for comp in searchList: obj={} try: dt=comp.find_element_by_tag_name('dt') except Exception as e: continue obj['title']=dt.text propNameList=comp.find_elements_by_class_name('lbl') if flag: map[propNameList[0].text]='dis' map[propNameList[1].text]='userName' map[propNameList[2].text]='phone' map[propNameList[3].text]='email' map[propNameList[4].text]='address' map[propNameList[5].text]='industry' flag=False propList=comp.find_elements_by_class_name('ele') k=0 for propName in propNameList: obj[map[propName.text]]=propList[k].text k=k+1 alist.append(obj) else: print '結果單頁' searchList=driver.find_elements_by_css_selector('#searchResultList dl') for comp in searchList: obj={} try: dt=comp.find_element_by_tag_name('dt') except Exception as e: continue obj['title']=dt.text propNameList=comp.find_elements_by_class_name('lbl') if flag: map[propNameList[0].text]='dis' map[propNameList[1].text]='userName' map[propNameList[2].text]='phone' map[propNameList[3].text]='email' map[propNameList[4].text]='address' map[propNameList[5].text]='industry' flag=False propList=comp.find_elements_by_class_name('ele') k=0 for propName in propNameList: obj[map[propName.text]]=propList[k].text k=k+1 alist.append(obj) for company in alist: print '插入數據庫' if not company.has_key('dis'): company['dis']='' if not company.has_key('userName'): company['userName']='' if not company.has_key('phone'): company['phone']='' if not company.has_key('email'): company['email']='' if not company.has_key('address'): company['address']='' if not company.has_key('industry'): company['industry']='' cursor.execute(sql,(company['dis'],company['userName'],company['phone'],company['email'],company['address'],company['industry'],company['title'],keyword)) db.commit() cursor.close() db.close() driver.close() print '運行結束'