近期參加一個課題，聊到路線規劃問題，須要搜索兩地點的最短線路距離以及最短用時等狀況，而後就想着用借用百度API，作個參考php

環境：html

　　python 3.6python

主要問題：web

1. 分析百度官方路線規劃API瞭解到路線規劃須要提供經緯度信息，因而借用百度地理編碼是指將地址或地名等位置描述轉換爲經緯度座標的過程。獲得的座標信息，能夠用於製圖或空間分析操做。（出於國家安全考慮，公佈出來的座標信息通常是通過加偏的。）http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-geocoding
2. 根據經緯度信息，現有起點和終點座標值（經緯度lng、lat），目的是經過百度地圖開發者平臺的路線規劃功能獲取起點終點路線規劃距離和預估時長，百度地圖開發者平臺路線規劃使用說明網址爲：http://lbsyun.baidu.com/index.php?title=webapi/direction-api-abroad
3.爬取過程可能會因爲服務器或者參數不知足要求致使爬蟲中斷，注意處理這個問題便可json

4.一是注意源文件的數據格式要轉utf-8；二是修改文件路徑；三是AK須要自行去開發者平臺申請。api

代碼以下（因爲文件不方便上傳，只需稍做修改，便可使用）：安全

# -*- coding:utf-8 -*- # ------------------------------ # @Time :2019/5/9 13:32 # @Author :jonie # @Email : # @File :code_get.py # Description: # ------------------------------
import csv import json import time import requests from bs4 import BeautifulSoup from urllib.request import urlopen, quote import json import requests # [113.63095213159264, 34.74830559988335]#
origin_path = 'data/賽點.csv'  # 原始數據文件路徑
new_path = 'data/地址對應座標.txt'  # 爬取數據文件保存路徑
 machine_data = csv.reader(open(origin_path, 'r', encoding='utf-8'))  # 讀取原始文件數據


for addr in machine_data:  # 循環爬取每一條數據
    # print(addr[2])
 address = addr[1] ak = 'FA8atAaqd1wajikD56lPqtiaNCldeya' url = 'http://api.map.baidu.com/geocoder/v2/?address=' output = 'json'
    # ak = '你的ak'#需填入本身申請應用後生成的ak
    add = quote(address)  # 本文城市變量爲中文，爲防止亂碼，先用quote進行編碼
    url2 = url + add + '&output=' + output + "&ak=" + ak req = urlopen(url2) res = req.read().decode() temp = json.loads(res) lng = temp['result']['location']['lng']  # 獲取經度
    lat = temp['result']['location']['lat']  # 獲取緯度
    lng = ("%.5f" % lng) lat = ("%.5f" % lat) list1 = [lng, lat,addr[0]] print('百度座標爲：', list1) with open(new_path, 'a', encoding='utf-8') as f: f.write(str(list1)) f.write('\n') f.close() with open("data/賽點信息.csv", 'a', newline='',encoding='utf-8') as t:  # numline是來控制空的行數的
        writer = csv.writer(t)  # 這一步是建立一個csv的寫入器（我的理解）
        writer.writerow(list1)  # 寫入標籤
        # writer.writerows(n) # 寫入樣本數據
        t.close()

調用百度地圖api獲取起點終點路線規劃距離和預估時長代碼

 1 import csv  2 import re  3 import time  4 import json  5 from urllib.request import urlopen  6 import urllib  7 
 8 # 原數據文件格式csv： 起點緯度 + 起點經度 + 索引 + 終點緯度 + 終點經度
 9 origin_path = 'data/b.csv'  # 原始數據文件路徑
 10 result_path = 'data/result122901.txt'  # 爬取數據文件保存路徑
 11 
 12 # 百度地圖提供的api服務網址
 13 url_drive = r"http://api.map.baidu.com/direction/v2/driving"  # 駕車(routematrix 批量算路)
 14 url_ride = r'http://api.map.baidu.com/routematrix/v2/riding?output=json'  # 騎行
 15 url_walk = r'http://api.map.baidu.com/routematrix/v2/walking?output=json'  # 步行
 16 url_bus = r'http://api.map.baidu.com/direction/v2/transit?output=json'  # bus(direction路線規劃)
 17 cod = r"&coord_type=bd09ll"
 18 # 聲明座標格式,bd09ll(百度經緯度座標);bd09mc(百度摩卡託座標);gcj02(國測局加密座標),wgs84(gps設備獲取的座標)
 19 # AK爲從百度地圖網站申請的祕鑰,額度不夠的時候直接在list後面新增AK就行
 20 AK = ['FA8atAaqd1wajikD56lPqtiasdfleCeyz']  21 # 把變量名先寫入文件
 22 colnames = '設備序列號 起點 終點 狀態碼 步行路程(米) 步行耗時(秒)'
 23 with open(result_path, 'a', encoding='utf-8') as f:  24  f.write(colnames)  25     f.write('\n')  26  f.close()  27 
 28 address = csv.reader(open(origin_path, 'r', encoding='utf-8'))  # 讀取原始文件數據
 29 
 30 # for ad in address:
 31 # # print(ad)
 32 # print(ad[0])
 33 # print(ad[1])
 34 # print(ad[2])
 35 # print(ad[3])
 36 # print(ad[4])
 37 n = 0  38 akn1 = 0  39 akn2 = 0  40 a = 0  41 while True:  42     try:  # 避開錯誤：文件編碼問題、服務器響應超時、
 43         for ad in address:  44             if (akn1 < len(AK)) and (akn2 < len(AK)):  # 配額是否夠
 45                 mac_code = str(ad[2])  # 設備序列號
 46                 try:  47                     ori = str(ad[0]) + ',' + str(ad[1])  # 起點
 48                     des = str(ad[3]) + ',' + str(ad[4])  # 終點
 49                     ak_drive = AK[akn1]  50                     ak_bus = AK[akn2]  51                     ak_drive2 = r'&ak=' + ak_drive  52                     ak_bus2 = r'&ak=' + ak_bus  53                     ori1 = r"?origin=" + ori  54                     des1 = r"&destination=" + des  55                     # 如下是自駕車
 56                     tac_type = r'&tactics=11'  # 駕車路徑:常規路線
 57                     # 10不走高速;11常規路線;12距離較短;13距離較短(不考慮路況) 只對駕車有效
 58                     aurl_drive = url_drive + ori1 + des1 + cod + tac_type + ak_drive2  # 駕車規劃網址
 59                     res_drive = urlopen(aurl_drive)  # 打開網頁
 60                     cet_drive = res_drive.read()  # 解析內容
 61                     res_drive.close()  # 關閉
 62                     result_drive = json.loads(cet_drive)  # json轉dict
 63                     status = result_drive['status']  64                     print('駕車碼', status)  65                     if status == 0:  # 狀態碼爲0：無異常
 66                         m_drive = result_drive['result']["routes"][0]['distance']  # 里程(米)
 67                         m_drive2 = float(m_drive)  # str轉float
 68                         timesec_drive = result_drive['result']["routes"][0]['duration']  # 耗時(秒)
 69                         diss_drive = '狀態' + str(status) + ' ' + str(m_drive) + ' ' + str(timesec_drive)  # 駕車總
 70                     elif status == 302 or status == 210 or status == 201:  # 302:額度不足;210:IP驗證未經過
 71                         m_drive2 = 10000  # 賦值(大於5km),即不爬取步行規劃
 72                         akn1 += 1
 73                         diss_drive = '狀態' + str(status) + ' break break'
 74                     else:  75                         m_drive2 = 10000  # 賦值(大於5km),即不爬取步行規劃
 76                         diss_drive = '狀態' + str(status) + ' na na'
 77                     try:  # 當駕車規劃m_drive2爲空的時候，if語句發生錯誤
 78                         if 0 < m_drive2 < 5000:  # 里程低於5千米則爬取步行規劃
 79                             aurl_walk = url_walk + ori1 + des1 + cod + ak_drive2  # 步行規劃網址
 80                             res_walk = urlopen(aurl_walk)  # 打開網頁
 81                             cet_walk = res_walk.read()  # 解析內容
 82                             result_walk = json.loads(cet_walk)  # json轉dict
 83                             res_walk.close()  # 關閉網頁
 84                             status_walk = result_walk['status']  # 狀態碼
 85                             if status_walk == 0:  # 狀態正常
 86                                 m_walk = result_walk['result']["routes"][0]['distance']  # 步行距離
 87                                 time_walk = result_walk['result']["routes"][0]['duration']  # 步行時間
 88                                 diss_walk = str(m_walk) + ' ' + str(time_walk)  # 步行總
 89                             else:  # 狀態異常
 90                                 diss_walk = 'na na'
 91                         else:  # 里程大於5km則不爬取步行規劃
 92                             diss_walk = 'na na'
 93                     except:  # 發生錯誤時步行數據也賦值爲na
 94                         diss_walk = 'na na'
 95                         pass
 96                     # 如下是乘車規劃
 97                     tac_bus = r'&tactics_incity=0'
 98                     # 市內公交換乘策略 可選，默認爲0 0推薦；1少換乘；2少步行；3不坐地鐵；4時間短；5地鐵優先
 99                     city_bus = r'&tactics_intercity=0'
100                     # 跨城公交換乘策略 可選，默認爲0 0時間短；1出發早；2價格低；
101                     city_type = r'&trans_type_intercity=2'
102                     # 跨城交通方式策略 可選，默認爲0 0火車優先；1飛機優先；2大巴優先；
103                     ori2 = r"&origin=" + ori 104                     des2 = r"&destination=" + des 105                     aurl_bus = url_bus + ori2 + des2 + tac_bus + city_bus + city_type + ak_bus2 106                     res_bus = urlopen(aurl_bus) 107                     cet_bus = res_bus.read() 108  res_bus.close() 109                     result_bus = json.loads(cet_bus) 110                     status = result_bus['status'] 111                     print('乘車碼', status) 112                     # --------------------------------------
113                     # if status == 0:
114                     # rsls = result_bus['result']['routes']
115                     # if rsls == []: # 無方案時狀態也爲0，但只返回一個空list
116                     # diss_bus = '狀態' + str(status) + ' ' + '無公交方案'
117                     # else:
118                     # m_bus = result_bus['result']['routes'][0]['distance'] # 乘車路線距離總長(米)
119                     # time_bus = result_bus['result']['routes'][0]['duration'] # 乘車時間(秒)
120                     # cost_bus = result_bus['result']['routes'][0]['price'] # 乘車費用(元)
121                     # diss_bus = '狀態' + str(status) + ' ' + str(m_bus) + ' ' + str(time_bus) + ' ' + str(cost_bus)
122                     # elif status == 302 or status == 210 or status == 201:
123                     # akn2 = akn2 + 1
124                     # diss_bus = '狀態' + str(status) + ' ' + '更換AK斷點'
125                     # else: # 其餘類型狀態碼(服務器錯誤)
126                     # diss_bus = '狀態' + str(status) + ' ' + '服務器錯誤'
127                     # -----------------------------------------------
128                     # 彙總數據
129                     diss = mac_code + ' ' + str(ori) + ' ' + str( 130                         des) + ' ' + diss_drive + ' ' + diss_walk #+ ' ' + diss_bus
131                     with open(result_path, 'a', encoding='utf-8') as f: 132  f.write(diss) 133                         f.write('\n') 134  f.close() 135                     n += 1
136                     print('第' + str(n) + '條已完成') 137                 except: 138                     time.sleep(3) 139                     diss_wrong = str(mac_code) + '未知錯誤'
140                     with open(result_path, 'a', encoding='utf-8') as f: 141  f.write(diss_wrong) 142                         f.write('\n') 143  f.close() 144                     continue
145             else: 146                 print('配額不足！') 147                 break
148     except: 149         time.sleep(3) 150         print('未知錯誤') 151         with open(result_path, 'a', encoding='utf-8') as f: 152             f.write('未知錯誤') 153             f.write('\n') 154  f.close() 155         continue
156     print('程序已中止運行') 157     break  # 跑完數時break打斷while循環,for循環的話這裏很差定義循環條件

最終根據生成的數據做圖以下：服務器

附錄：app

1.數據獲取（藉助攜程網爬取鄭州市以好評度優先的方式爬取全部星級酒店信息）

 1 import requests  2 import random  3 from bs4 import BeautifulSoup  4 import time  5 import csv  6 import json  7 import re  8 import pandas as pd  9 import numpy as np  10 
 11 pd.set_option('display.max_columns', 10000)  12 pd.set_option('display.max_rows', 10000)  13 pd.set_option('display.max_colwidth', 10000)  14 pd.set_option('display.width',1000)  15 
 16 # Beijing 5 star hotel list url
 17 five_star_url = "http://hotels.ctrip.com/Domestic/Tool/AjaxHotelList.aspx"
 18 filename = "data/star hotel list.csv"
 19 
 20 def Scrap_hotel_lists():  21     """
 22  It aims to crawl the 5 star hotel lists in Beijing and save in a csv file.  23     """
 24     headers = {  25         "Connection": "keep-alive",  26         "origin": "http://hotels.ctrip.com",  27         "Host": "hotels.ctrip.com",  28         "referer": "https://hotels.ctrip.com/hotel/zhengzhou559",  29         "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",  30         "Content-Type":"application/x-www-form-urlencoded; charset=utf-8"
 31  }  32 
 33 
 34 
 35     id = []  36     name = []  37     hotel_url = []  38     address = []  39     score = []  40 
 41     # 8 pages
 42     for page in range(1,8):  43 
 44         data = {  45             "StartTime": "2019-09-08",  # The value depends on the date you want to scrap.
 46             "DepTime": "2019-09-18",  47             "RoomGuestCount": "0,1,2",  48             "cityId": 559,  49             "cityPY": " zhengzhou",  50             "cityCode": "0371",  51             "cityLat": 34.758044,  52             "cityLng": 113.673121,  53             "page": page,  54             "star": "3",  55             "orderby": 3
 56  }  57         html = requests.post(five_star_url, headers=headers, data=data)  58 
 59         # print(html.text)
 60         j= json.loads(html.text.replace("\洛陽","洛陽"))  61         #hotel_list = html.json()["totalMsg"]
 62         hotel_list = j["hotelPositionJSON"]  63 
 64         for item in hotel_list:  65             id.append(item['id'])  66             name.append(item['name'])  67             hotel_url.append(item['url'])  68             address.append(item['address'])  69             score.append(item['score'])  70 
 71         time.sleep(random.randint(3,5))  72     hotel_array = np.array((id, name, score, hotel_url, address)).T  73     list_header = ['id', 'name', 'score', 'url', 'address']  74     array_header = np.array((list_header))  75     hotellists = np.vstack((array_header, hotel_array))  76     with open(filename, 'a', encoding="utf-8-sig", newline="") as f:  77         csvwriter = csv.writer(f, dialect='excel')  78  csvwriter.writerows(hotellists)  79 
 80 
 81 def hotel_detail(hotel_id):  82     """
 83  It aims to scrap the detailed information of a specific hotel.  84     """
 85     headers = {"Connection": "keep-alive",  86                "Accept-Language": "zh-CN,zh;q=0.9",  87                "Cache-Control": "max-age=0",  88                "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",  89                "Host": "hotels.ctrip.com",  90                "If-Modified-Since": "Thu, 01 Jan 1970 00:00:00 GMT",  91                "Referer": "http://hotels.ctrip.com/hotel/2231618.html",  92                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
 93                              "Chrome/69.0.3497.92 Safari/537.36"
 94  }  95 
 96     basic_url = "http://hotels.ctrip.com/Domestic/tool/AjaxHote1RoomListForDetai1.aspx?hotel="
 97     url = basic_url + str(hotel_id)  98 
 99     r = requests.get(url, headers=headers) 100     # Response is a json object.
101     html = r.json()['html'] 102     soup = BeautifulSoup(html, "lxml") 103     rooms = soup.findAll('td', attrs={"class": "child_name J_Col_RoomName"}) 104 
105     RoomID = [] 106     RoomName = [] 107     LowPrice = [] 108     RoomSize = [] 109     RoomLevel = [] 110     IsAddBed = [] 111     BedSize = [] 112     CustomerNum = [] 113 
114     # Regex Pattern
115     baseroom_pattern = re.compile(r'<[^>]+>')  # r'<[^>]+>'
116 
117     for idx in range(len(rooms)): 118         if rooms[idx].has_attr(key='data-baseroominfo'): 119             room_info_str = rooms[idx]['data-baseroominfo'] 120             room_info_json = json.loads(room_info_str) 121             RoomID.append(str(room_info_json["RoomID"])) 122             RoomName.append(room_info_json["RoomName"]) 123             LowPrice.append(room_info_json["LowPrice"]) 124 
125             baseroom_info = room_info_json["BaseRoomInfo"] 126             # print(type(baseroom_info))
127             # <class 'str'>
128             remove_tag = baseroom_pattern.sub("", baseroom_info) 129             RoomDetailInfo = remove_tag.split("|") 130             if len(RoomDetailInfo) == 4: 131                 RoomDetailInfo.insert(3, None) 132 
133  RoomSize.append(RoomDetailInfo[0]) 134             RoomLevel.append(RoomDetailInfo[1]) 135             BedSize.append(RoomDetailInfo[2]) 136             IsAddBed.append(RoomDetailInfo[3]) 137             CustomerNum.append(RoomDetailInfo[4]) 138         else: 139             continue
140 
141     RoomInfo = np.array((RoomID, RoomName, LowPrice, RoomSize, RoomLevel, BedSize, IsAddBed, CustomerNum)).T 142     # Create a DataFrame object
143     # print(RoomInfo)
144     column_name = ['RoomID', 'RoomName', 'LowPrice', 'RoomSize', 'RoomLevel', 'BedSize', 'IsAddBed', 'CustomerNum'] 145     df = pd.DataFrame(data=RoomInfo, columns=column_name) 146     print(df) 147 
148 
149 if __name__ == "__main__": 150 
151     # # 1. Scrap 5 star hotel list in Beijing
152  Scrap_hotel_lists() 153 
154     # 2. Scrap the detailed hotel information
155     df = pd.read_csv(filename, encoding='utf8') 156     print("1. Beijing 5 Star Hotel Lists") 157     print(df) 158     hotelID = df["id"] 159     print('\n') 160 
161     while True: 162         print("2.1 If you find to search the detail hotel information, please input the hotel index in the DataFrame.") 163         print("2.2 If you want to quit, input 'q'.") 164 
165         print("Please input the Parameter: ") 166         input_param = input() 167         if input_param.isnumeric(): 168             hotel_index = int(input_param) 169             if 0 <= hotel_index <= 170: 170                 print("3. The detail information of the Hotel:") 171  hotel_detail(hotelID[hotel_index]) 172             else: 173                 print('Hotel Index out of range! ') 174                 print('Remember: 0 <= Hotel Index <= 170') 175                 print('Please input again.') 176                 continue
177         elif input_param == 'q': 178             print('See you later!') 179             break
180         else: 181             print('Invalid Input!') 182             print('\n') 183             continue

2.根據生成數據繪製酒店信息雲圖

from pyecharts import WordCloud import random name1 =hotel_list2 random_list =[296, 630, 。。。] # 也能夠經過一下三行生成隨機整數列表 # for i in range(len(name1)): # #隨機產生len(name1)個300-10000整數 # random_list.append(random.randint(300,800)) # print('生成的隨機整數列表爲：\n',random_list)
 value =random_list wordcloud =WordCloud(width=1300, height=800) wordcloud.add("酒店信息", name1, value, word_size_range=[10,20], shape='pentagon') wordcloud.show_config() wordcloud.render()

Note：以上純屬娛樂學習之用。echarts

百度API之路線規劃

調用百度地圖api獲取起點終點路線規劃距離和預估時長代碼