OSM是OpenStreetMap的開源數據格式,採用xml存儲。這裏將其轉爲json後能夠加載到Spark/Hadoop等系統中進一步處理,也能夠直接轉入GIS軟件中使用。node
提取OpenStreetMap的osm文件(xml格式),轉爲json並保存到三個文件。
json
採用遞歸方式處理,內存消耗少,適合大文件的處理,速度很快。
函數
from pprint import * import json # 將指定tag的對象提取,寫入json文件。 def process_element(elem): elem_data = etree.tostring(elem) elem_dict = xmltodict.parse(elem_data,attr_prefix="",cdata_key="") #print(elem_dict) if (elem.tag == "node"): elem_jsonStr = json.dumps(elem_dict["node"]) fnode.write(elem_jsonStr + "\n") elif (elem.tag == "way"): elem_jsonStr = json.dumps(elem_dict["way"]) fway.write(elem_jsonStr + "\n") elif (elem.tag == "relation"): elem_jsonStr = json.dumps(elem_dict["relation"]) frelation.write(elem_jsonStr + "\n") # 遍歷全部對象,而後調用process_element處理。 # 迭代處理,func爲迭代的element處理函數。 def fast_iter(context, func, maxline): placement = 0 try: for event, elem in context: placement += 1 if (maxline > 0): # 最多的轉換對象限制,大數據調試時使用於抽樣檢查。 print(etree.tostring(elem)) if (placement >= maxline): break func(elem) #處理每個元素,調用process_element. elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] except Exception as ex: print(time.strftime(ISOTIMEFORMAT),", Error:",ex) del context
進行轉換:
oop
# 須要處理的osm文件名,自行修改。 osmfile = '../data/muenchen.osm' maxline = 0 #抽樣調試使用,最多轉換的對象,設爲0則轉換文件的所有。 ISOTIMEFORMAT="%Y-%m-%d %X" print(time.strftime( ISOTIMEFORMAT),", Process osm XML...",osmfile," =>MaxLine:",maxline) fnode = open(osmfile + "_node.json","w+") fway = open(osmfile + "_way.json","w+") frelation = open(osmfile + "_relation.json","w+") context = etree.iterparse(osmfile,tag=["node","way","relation"]) fast_iter(context, process_element, maxline) fnode.close() fway.close() frelation.close() print(time.strftime( ISOTIMEFORMAT),", OSM to JSON, Finished.")