Numpy是Python科學計算的經常使用庫,也在機器學習領域具備重要的做用。Numpy主要實現矩陣運算功能,這裏的教程將地理空間數據集與Numpy實現相互轉換,從而能夠將Python的更多通用庫用於地理空間分析,包括矢量和柵格數據的轉換。進一步,能夠利用Pandas和GeoPandas的相關功能。python
from iobjectspy import (datasetvector_to_numpy_array, numpy_array_to_datasetvector, datasetraster_to_numpy_array, numpy_array_to_datasetraster) import numpy as np import os
# 設置示例數據路徑 #example_data_dir = '' example_data_dir = '/home/jovyan/data/smdata/' # 設置結果輸出路徑 out_dir = os.path.join(example_data_dir, 'out')
if not os.path.exists(out_dir): os.makedirs(out_dir) def vector_to_numpy_test(): """讀取矢量數據集 Town_P 到 numpy 數組中""" narray1 = datasetvector_to_numpy_array(os.path.join(example_data_dir, 'example_data.udb/Landuse_R'), export_spatial=True) print(narray1.dtype) print(narray1[:10]) narray = datasetvector_to_numpy_array(os.path.join(example_data_dir, 'example_data.udb/Town_P'), export_spatial=True) if narray is None: print('讀取矢量數據集到 numpy 數組失敗') else: print('ndarray.ndim : ' + str(narray.ndim)) print('ndarray.dtype : ' + str(narray.dtype)) print(narray[:10]) print(narray['SmX'][:10]) print('讀取矢量數據集到 numpy 數組成功') xy_array = np.c_[narray['SmX'], narray['SmY']][:10] print(xy_array.ndim) print(xy_array.dtype) print(xy_array) try: # 使用 hdbscan 作層次聚類,並將結果在 matplotlib 顯示 % matplotlib inline from hdbscan import HDBSCAN import matplotlib.pyplot as plt xy_c = np.c_[narray['SmX'], narray['SmY']] hdb = HDBSCAN(min_cluster_size=10).fit(xy_c) hdb_labels = hdb.labels_ n_clusters_hdb_ = len(set(hdb_labels)) - (1 if -1 in hdb_labels else 0) hdb_unique_labels = set(hdb_labels) hdb_colors = plt.cm.Spectral(np.linspace(0, 1, len(hdb_unique_labels))) fig = plt.figure(figsize=plt.figaspect(1)) hdb_axis = fig.add_subplot('111') for k, col in zip(hdb_unique_labels, hdb_colors): if k == -1: col = 'k' hdb_axis.plot(xy_c[hdb_labels == k, 0], xy_c[hdb_labels == k, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) hdb_axis.set_title('HDBSCAN\nEstimated number of clusters: %d' % n_clusters_hdb_) plt.show() except ImportError: pass def numpy_to_vector_test(): """寫入數據到矢量數據集中""" narray = np.empty(10, dtype=[('ID', 'int32'), ('X', 'float64'), ('Y', 'float64'), ('NAME', 'U10')]) narray[0] = 1, 116.380351, 39.93393099, '什剎海' narray[1] = 2, 116.365305, 39.89622499, '廣安門內' narray[2] = 3, 116.427342, 39.89467499, '崇文門外' narray[3] = 4, 116.490881, 39.96567299, '酒仙橋' narray[4] = 5, 116.447486, 39.93767799, '三里屯' narray[5] = 6, 116.347435, 40.08078599, '回龍觀' narray[6] = 7, 116.407196, 39.83895899, '大紅門' narray[7] = 8, 116.396915, 39.88371499, '天橋' narray[8] = 9, 116.334522, 40.03594199, '清河' narray[9] = 10, 116.03008, 39.87852799, '潭柘寺' print(narray) result = numpy_array_to_datasetvector(narray, os.path.join(out_dir, 'out_numpy_array.udb'), x_col='X', y_col='Y') if result is not None: if isinstance(result, str): if isinstance(result, str): print('從 numpy 數組中寫入數據到矢量數據集 %s 成功' % result) else: print('從 numpy 數組中寫入數據到矢量數據集 %s 成功' % result.name) else: print('從 numpy 數組中寫入數據到矢量數據集失敗') def raster_to_numpy_test(): """從柵格數據 DEM 中讀取數據到 numpy 數組中""" narray = datasetraster_to_numpy_array(os.path.join(example_data_dir, 'example_data.udb/DEM')) if narray is None: print('讀取柵格數據集到 numpy 數組失敗') else: print('ndarray.ndim : ' + str(narray.ndim)) print('ndarray.dtype : ' + str(narray.dtype)) print(narray) print('讀取柵格數據集到 numpy 數組成功') numpy_array_to_datasetraster(narray, 0.001, 0.001, os.path.join(out_dir, 'out_numpy_array.udb'), as_grid=False) def numpy_to_raster_test(): """從 numpy 的數組二進制文件中加載數據,寫道柵格數據集中""" narray = np.load(os.path.join(example_data_dir, 'dem.npy')) print(narray) result = numpy_array_to_datasetraster(narray, 0.001, 0.001, os.path.join(example_data_dir, 'out_numpy_array.udb'), as_grid=True) if result is not None: if isinstance(result, str): print('從 numpy 數組中寫入數據到柵格數據集 %s 成功' % result) else: print('從 numpy 數組中寫入數據到柵格數據集 %s 成功' % result.name) else: print('從 numpy 數組中寫入數據到柵格數據集失敗')
if __name__ == '__main__': # 讀取矢量數據集到 numpy 數組成功 vector_to_numpy_test() # 從 numpy 數組中寫入數據到矢量數據集中成功 numpy_to_vector_test() # 讀取柵格數據集到 numpy 數組成功 raster_to_numpy_test() # 從 numpy 數組中寫入數據到柵格數據集中成功 numpy_to_raster_test()
[('LANDTYPE', '<U4'), ('Area', '<f4'), ('Area_1', '<i2'), ('SmX', '<f8'), ('SmY', '<f8'), ('SmPerimeter', '<f8'), ('SmArea', '<f8')] [('用材林', 132., 132, 116.47779337, 40.87251703, 0.75917921, 1.40894401e-02) ('用材林', 97., 97, 116.6540059 , 40.94696274, 0.4945153 , 1.03534475e-02) ('灌叢', 36., 36, 116.58451795, 40.98712283, 0.25655489, 3.89923745e-03) ('灌叢', 36., 36, 116.89611418, 40.76792703, 0.59237713, 3.81791878e-03) ('用材林', 1., 1, 116.37943683, 40.91435429, 0.03874328, 7.08450886e-05) ('灌叢', 126., 126, 116.49117083, 40.78302383, 0.53664074, 1.34577856e-02) ('用材林', 83., 83, 116.69943237, 40.74456848, 0.39696365, 8.83225363e-03) ('用材林', 128., 128, 116.8129727 , 40.69116153, 0.56949408, 1.35877743e-02) ('用材林', 29., 29, 116.24543769, 40.71076092, 0.30082509, 3.07221559e-03) ('灌叢', 467., 467, 116.43290772, 40.50875567, 1.91745792, 4.95537433e-02)] ndarray.ndim : 1 ndarray.dtype : [('NAME', '<U9'), ('SmX', '<f8'), ('SmY', '<f8')] [('百尺竿鄉', 115.917748, 39.53525099) ('什剎海', 116.380351, 39.93393099) ('月壇', 116.344828, 39.91476099) ('廣安門內', 116.365305, 39.89622499) ('牛街', 116.36388 , 39.88680299) ('崇文門外', 116.427342, 39.89467499) ('永定門外', 116.402249, 39.86559299) ('崔各莊', 116.515447, 39.99966499) ('小關', 116.411727, 39.97737199) ('潘家園', 116.467911, 39.87179299)] [115.917748 116.380351 116.344828 116.365305 116.36388 116.427342 116.402249 116.515447 116.411727 116.467911] 讀取矢量數據集到 numpy 數組成功 2 float64 [[115.917748 39.53525099] [116.380351 39.93393099] [116.344828 39.91476099] [116.365305 39.89622499] [116.36388 39.88680299] [116.427342 39.89467499] [116.402249 39.86559299] [116.515447 39.99966499] [116.411727 39.97737199] [116.467911 39.87179299]] [( 1, 116.380351, 39.93393099, '什剎海') ( 2, 116.365305, 39.89622499, '廣安門內') ( 3, 116.427342, 39.89467499, '崇文門外') ( 4, 116.490881, 39.96567299, '酒仙橋') ( 5, 116.447486, 39.93767799, '三里屯') ( 6, 116.347435, 40.08078599, '回龍觀') ( 7, 116.407196, 39.83895899, '大紅門') ( 8, 116.396915, 39.88371499, '天橋') ( 9, 116.334522, 40.03594199, '清河') (10, 116.03008 , 39.87852799, '潭柘寺')] 從 numpy 數組中寫入數據到矢量數據集 NewDataset 成功 DEM ndarray.ndim : 2 ndarray.dtype : int16 [[ 360 357 353 ... 1373 1320 1265] [ 349 353 355 ... 1354 1292 1259] [ 353 358 356 ... 1375 1341 1319] ... [ 741 756 745 ... 819 836 860] [ 700 713 718 ... 821 848 872] [ 684 696 696 ... 828 854 880]] 讀取柵格數據集到 numpy 數組成功 [[ 360 357 353 ... 1373 1320 1265] [ 349 353 355 ... 1354 1292 1259] [ 353 358 356 ... 1375 1341 1319] ... [ 741 756 745 ... 819 836 860] [ 700 713 718 ... 821 848 872] [ 684 696 696 ... 828 854 880]] 從 numpy 數組中寫入數據到柵格數據集 NewDataset 成功