轉換NOAA天氣數據文件「 .fly」爲Pandas DataFramecode
獲取數據 ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/dailyget
In [1]:pandas
import matplotlib.pyplot as pltimport pandas as pdimport numpy as npimport reimport ftplib% matplotlib notebook
In [2]:it
# download data from FTPdef download_file_from_ftp ( FTP_SERVER , FTP_PATH , FILENAME ):with ftplib . FTP ( FTP_SERVER ) as ftp :ftp . login ()ftp . cwd ( FTP_PATH )with open ( FILENAME , 'wb' ) as f :ftp . retrbinary ( 'RETR ' + FILENAME , f . write )
In [3]:io
def get_station_ID ( station_to_find , filename ):for line in open ( filename ):if station_to_find in line :line_with_station = linestation_ID = re . split ( " " , line_with_station )[ 0 ]return station_IDreturn None# warning, it is slow, download it only oncedownload_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily" , "ghcnd-stations.txt" )station_to_find = "GUANGZHOU" # USE CAPSstation_ID = get_station_ID ( station_to_find , "ghcnd-stations.txt" )
In [4]:import
weather_data_filename = station_ID + '.dly'# warning, it is slow, download it only oncedownload_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily/all" , weather_data_filename )
In [7]:ftp
df = convert_dly_to_dataframe ( weather_data_filename )df . head ()
Out[7]:file
YEAR下載
MONTHnumpy
ELEMENT
VALUE1
VALUE2
VALUE3
VALUE4
VALUE5
VALUE6
VALUE7
...
VALUE22
VALUE23
VALUE24
VALUE25
VALUE26
VALUE27
VALUE28
VALUE29
VALUE30
VALUE31
0
1945
11
TAVG
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
107.0
NaN
1
1945
12
TAVG
123.0
136.0
152.0
144.0
146.0
189.0
219.0
...
179.0
146.0
128.0
107.0
104.0
112.0
122.0
127.0
129.0
156.0
2
1946
1
TAVG
150.0
150.0
123.0
117.0
112.0
121.0
125.0
...
146.0
153.0
173.0
196.0
211.0
212.0
218.0
201.0
156.0
131.0
3
1946
2
TAVG
114.0
112.0
147.0
181.0
195.0
192.0
149.0
...
201.0
196.0
231.0
226.0
221.0
229.0
240.0
NaN
NaN
NaN
4
1946
3
TAVG
237.0
162.0
142.0
133.0
183.0
187.0
160.0
...
183.0
192.0
205.0
216.0
223.0
238.0
207.0
195.0
233.0
228.0