阿里雲提供的地理信息接口python
https://datav.aliyun.com/tools/atlas/mysql
有兩個接口, 一個是[行政編碼].json, 一個是[行政編碼]_full.json, 從接口中能夠提取到區縣一級的行政區劃信息. 提取的過程當中遇到的一些問題:sql
使用生成的行政區劃數據時, 對於香港澳門的數據, 由於沒有level=city的這一級, 因此須要特殊處理一下, 例如在讀取province這一級的子節點時, 若是發現沒有level=city的節點, 那麼就返回一個虛擬的節點, 這個節點各字段值和本身同樣, 可是level=city.json
#!/usr/bin/python3 # -*- coding: UTF-8 -*- import json import traceback import rbcommon def readRegion(adcode, parent_code = None): # https://geo.datav.aliyun.com/areas/bound/140000.json url = 'https://geo.datav.aliyun.com/areas/bound/' + adcode + '.json' print(url) echo = rbcommon.requestGet(url, 'UTF-8', 20, 10) if echo is None: print('URL request failed: ' + url) return elif echo.find('<?') == 0: print('Not found: ' + url) return # print(echo) json_obj = json.loads(echo) region = {} region['name'] = json_obj['features'][0]['properties']['name'] region['adcode'] = json_obj['features'][0]['properties']['adcode'] region['telecode'] = json_obj['features'][0]['properties']['telecode'] level = json_obj['features'][0]['properties']['level'] if (level == 'country'): region['level'] = 0 elif (level == 'province'): region['level'] = 1 elif (level == 'city'): region['level'] = 2 elif (level == 'district'): region['level'] = 3 if ('parent' in json_obj['features'][0]['properties']) and (not json_obj['features'][0]['properties']['parent'] is None): region['parent'] = json_obj['features'][0]['properties']['parent']['adcode'] else: region['parent'] = parent_code # read sub regions sub_regions = [] region['children'] = sub_regions # https://geo.datav.aliyun.com/areas/bound/140000_full.json url = 'https://geo.datav.aliyun.com/areas/bound/' + adcode + '_full.json' print(url) echo = rbcommon.requestGet(url, 'UTF-8', 20, 10) if echo is None: print('URL request failed: ' + url) return region elif echo.find('<?') == 0: print('Not found: ' + url) return region # print(echo) json_obj = json.loads(echo) sub_objs = json_obj['features'] for sub_obj in sub_objs: sub_region = {} sub_region['adcode'] = (str)(sub_obj['properties']['adcode']) if (sub_region['adcode'] == region['adcode']): continue sub_region['name'] = sub_obj['properties']['name'] sub_region['telecode'] = None level = sub_obj['properties']['level'] if (level == 'country'): sub_region['level'] = 0 elif (level == 'province'): sub_region['level'] = 1 elif (level == 'city'): sub_region['level'] = 2 elif (level == 'district'): sub_region['level'] = 3 sub_region['parent'] = adcode sub_regions.append(sub_region) # further check if the parent adcode is correct if (len(sub_regions) > 0): # https://geo.datav.aliyun.com/areas/bound/140000.json url = 'https://geo.datav.aliyun.com/areas/bound/' + sub_regions[0]['adcode'] + '.json' # print(url) echo = rbcommon.requestGet(url, 'UTF-8', 20, 10) if echo is None: print('URL request failed: ' + url) elif echo.find('<?') == 0: print('Not found: ' + url) else: json_obj = json.loads(echo) if ('parent' in json_obj['features'][0]['properties']) and (not json_obj['features'][0]['properties']['parent'] is None): dummy_parent = json_obj['features'][0]['properties']['parent']['adcode'] if (dummy_parent != sub_regions[0]['parent']): print('Update parent from {} to {}', sub_regions[0]['parent'], dummy_parent) for sub_region in sub_regions: sub_region['parent'] = dummy_parent return region def readAllRegion(parent_region): region = readRegion(parent_region['adcode'], parent_region['parent']) if not region is None: if (not region['parent'] is None) and (not region['parent'] in regions): new_region = readRegion(region['parent'], parent_region['parent']) if not new_region is None: regions.add(new_region['adcode']) insert(new_region) regions.add(region['adcode']) insert(region) for sub_region in region['children']: readAllRegion(sub_region) else: regions.add(parent_region['adcode']) insert(parent_region) def insert(region): try: with rbcommon.mysqlclient.cursor() as cursor: sql = 'INSERT IGNORE INTO `s_region` (`id`, `parent_id`, `level`, `name`, `tele_code`, `short_name`, ' \ '`full_name`) VALUES (%s, %s, %s, %s, %s, %s, %s)' cursor.execute(sql, ( region['adcode'], None if (not 'parent' in region) else region['parent'], region['level'], region['name'], region['telecode'], region['name'], '{}')) rbcommon.mysqlclient.commit() except Exception as e: print(json.dumps(region)) traceback.print_exc() ### MAIN ### regions = set() region = readRegion('100000') readAllRegion(region)
其中rbcommon.mysqlclient的初始化方法app
mysqlclient = pymysql.connect( host=cfg['mysql']['host'], port=cfg['mysql']['port'], user=cfg['mysql']['user'], password=cfg['mysql']['password'], db=cfg['mysql']['db'], charset=cfg['mysql']['charset'], cursorclass=pymysql.cursors.DictCursor)