DataFrame與shp文件相互轉換

時間 2019-11-10

原文原文鏈接

由於習慣了使用pandas的DataFrame數據結構，同時pandas做爲一個方便計算和表操做的數據結構具備十分顯著的優點，甚至不少時候dataFrame能夠做爲excel在使用，而在用python操做gis的shp文件時很不暢，不太符合使用習慣，故寫了一個DataFrame與arcgis地理文件相互轉換的函數，這個處理起來能夠節約大量的思考時間。python

Shp轉DataFrame：數據結構

import arcpy
import pandas as pd

def Shp2dataframe(path):
    '''將arcpy表單變爲pandas表單輸出'''
    fields=arcpy.ListFields(path)
    table=[]
    fieldname=[field.name for field in fields]
    #遊標集合，用for 循環一次後沒辦法循環第二次!一個遊標實例只能循環一次
    data=arcpy.SearchCursor(path)
    for row in data:
        #Shape字段中的要數是一個幾何類
        r=[]
        for field in fields:
            r.append(row.getValue(field.name))
        table.append(r)
    return pd.DataFrame(table,columns=fieldname)

DataFrame轉Shp：app

DataFrame轉Shp採用了模板形式，經過模板創建字段文件，座標系等能夠更加快速構建字段。
函數

#將由ReadTable讀取的pandas錶轉換爲shp格式,template爲模板
def Dataframe2ShpTemplate(df,outpath,geoType,template):
    '''
    Fuction:
    make the table of pandas's DataFrame convert to the shp of esri
    Input:
    df -- pandas DataFrame from the shp converted
    outpath -- the shp output path
    geometryType -- the type of geomentey, eg:'POINT','POLYLINE','POLYGON','MULTIPOINT'
    temple -- the temple, at most time it is used the DataFrame's shp
    '''
    out_path = outpath.replace(outpath.split('/')[-1],'')
    out_name = outpath.split('/')[-1]
    geometry_type = geoType
    #template爲模板，能夠將裏面屬性所有賦予新建的要素，包括字段、座標系
    feature_class = arcpy.CreateFeatureclass_management(
        out_path, out_name, geometry_type, template)
    #'*'表示插入全部字段，但若是不用模板容易產生位置不對等
    #cursor = arcpy.da.InsertCursor(outpath,'*')
    for row in df.index:
        #Shape須要改成'SHAPE@'才能夠寫入
        df['SHAPE@'] = df['Shape']
        cursor = arcpy.da.InsertCursor(outpath,[field for field in df.columns])
        cursor.insertRow([df[field][row] for field in df.columns])
    print 'Pandas to shp finish!'
    del cursor

實例應用：編碼

寫一個根據gps公交點Txt構建shp數據代碼，代碼以下：.net

def readDataFile(filetype,filename,savefile):
    #用'gbk'編碼讀取,讀取成統一編碼的unicode
    with codecs.open(filename,encoding='gbk') as datafile:
        
        #以列表形式讀取全部文件
        pointData = datafile.readlines()
        #第一行刪除並返回爲title
        outputFileName = 'bus'+re.findall('[0-9]*[0-9]',filename)[0]+filetype
        #檢查是否導出文件重複
        saveEnv = arcpy.Describe(savefile)
        for child in saveEnv.children:
            if child.name == outputFileName:
                outputFileName = outputFileName + '_1'
        print 'output path is %s'%(savefile+outputFileName)
        #設置shp文件模板
        template = u'./dealing/temple.gdb/%s'%filetype
        linename = filename.strip('./dealing\\').decode('gbk').encode('utf-8')
        if filetype == 'point':
            df = pd.DataFrame(columns=Shp2dataframe(template).columns)
            for num in xrange(len(pointData)):
                row = pointData[num].strip('\r\n').split(' ')
                
                df.set_value(num,'name',row[0])
                df.set_value(num,'x',row[1])
                df.set_value(num,'y',row[2])
                df.set_value(num,'line',linename.strip('point.txt'))
                
                point = arcpy.PointGeometry(arcpy.Point(row[1],row[2]))
                df.set_value(num,'Shape',point)
                
        elif filetype == 'line':
            df = pd.DataFrame(columns=Shp2dataframe(template).columns)
            pointList = []
            #構建線集合
            for eachPoint in pointData:
                coord = eachPoint.strip('\r\n').split(' ')
                pointList.append(arcpy.Point(float(coord[0]),float(coord[1])))
            df.set_value(0,'name',linename.strip('line.txt'))
            #組建線要素arcpy.Polyline(arcpy.Array(pointList))
            df.set_value(0,'Shape',arcpy.Polyline(arcpy.Array(pointList)))
    
    Dataframe2ShpTemplate(df,savefile+outputFileName,'',template)
    return df

-------sugar---------------------sugar--------------------sugar-------------------sugar----------------sugar----------excel

#搜索目錄下的全部帶point.txt和line.txt的文件

pointfiles = glob.glob('./dealing/*point.txt')
polylinefiles = glob.glob('./dealing/*line.txt')

for pf in pointfiles:
    print pf
    readDataFile('point',pf,u'dealing/廣州市道路網.gdb/')

for pl in polylinefiles:
    print pl
    df=readDataFile('line',pl,u'dealing/廣州市道路網.gdb/')


lineshp = arcpy.Describe(u'dealing/廣州市道路網.gdb/')
linelist = []
for child in lineshp.children:
    if 'line' in child.name:
        linelist.append(u'dealing/廣州市道路網.gdb/'+child.name)
arcpy.Merge_management(linelist,u'dealing/廣州市道路網.gdb/0allLine')

Kanonpycode

http://my.oschina.net/Kanonpy/admin/edit-blog?blog=425633blog