import numpy as np import pandas as pd from pandas import Series,DataFrame
# 生成時間索引的Series序列 t = pd.date_range('2019-01-01','2019-12-29') t
DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08', '2019-01-09', '2019-01-10', ... '2019-12-20', '2019-12-21', '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25', '2019-12-26', '2019-12-27', '2019-12-28', '2019-12-29'], dtype='datetime64[ns]', length=363, freq='D')
s1 = Series(np.random.randn(len(t)), index=t) s1
2019-01-01 -0.951227 2019-01-02 0.761531 2019-01-03 0.146341 2019-01-04 0.249194 2019-01-05 -0.437687 ... 2019-12-25 0.169545 2019-12-26 3.220068 2019-12-27 1.515246 2019-12-28 -0.622776 2019-12-29 0.609221 Freq: D, Length: 363, dtype: float64
# 採樣月份數據 # 其中一個月份 s1['2019-01']
2019-01-01 -0.951227 2019-01-02 0.761531 2019-01-03 0.146341 2019-01-04 0.249194 2019-01-05 -0.437687 2019-01-06 1.186244 2019-01-07 0.974844 2019-01-08 0.521287 2019-01-09 1.715429 2019-01-10 2.260809 2019-01-11 0.758895 2019-01-12 -1.564395 2019-01-13 -0.505064 2019-01-14 -0.585892 2019-01-15 0.055110 2019-01-16 -0.610403 2019-01-17 0.525841 2019-01-18 -1.189281 2019-01-19 -2.111323 2019-01-20 0.326681 2019-01-21 0.157808 2019-01-22 -1.162134 2019-01-23 0.230476 2019-01-24 1.347033 2019-01-25 0.909771 2019-01-26 -0.033264 2019-01-27 -0.307241 2019-01-28 -1.847770 2019-01-29 -0.222650 2019-01-30 1.248396 2019-01-31 -0.051844 Freq: D, dtype: float64
# 一個月份的平均值 (將值放進新的Series,但pd實現了一個更方便的方法) s1['2019-01'].mean()
0.05791979036590383
# pd實現了時間採樣(天數據->月數據) s_m1 = s1.resample('M').mean() s_m1
2019-01-31 0.057920 2019-02-28 0.146369 2019-03-31 0.010041 2019-04-30 0.000835 2019-05-31 -0.125909 2019-06-30 0.159881 2019-07-31 0.189943 2019-08-31 -0.337287 2019-09-30 0.005125 2019-10-31 -0.132957 2019-11-30 0.076836 2019-12-31 0.203451 Freq: M, dtype: float64
# (天->小時)resample提供了填充數據的幾種方式 (ffill數據向前填充) s1.resample('H').ffill()
2019-01-01 00:00:00 -0.951227 2019-01-01 01:00:00 -0.951227 2019-01-01 02:00:00 -0.951227 2019-01-01 03:00:00 -0.951227 2019-01-01 04:00:00 -0.951227 ... 2019-12-28 20:00:00 -0.622776 2019-12-28 21:00:00 -0.622776 2019-12-28 22:00:00 -0.622776 2019-12-28 23:00:00 -0.622776 2019-12-29 00:00:00 0.609221 Freq: H, Length: 8689, dtype: float64
# 數據向後填(01-01 01 數據來自於2-1) s1.resample('H').bfill()
2019-01-01 00:00:00 -0.951227 2019-01-01 01:00:00 0.761531 2019-01-01 02:00:00 0.761531 2019-01-01 03:00:00 0.761531 2019-01-01 04:00:00 0.761531 ... 2019-12-28 20:00:00 0.609221 2019-12-28 21:00:00 0.609221 2019-12-28 22:00:00 0.609221 2019-12-28 23:00:00 0.609221 2019-12-29 00:00:00 0.609221 Freq: H, Length: 8689, dtype: float64
模擬構建時間序列圖
# 一年按小時生成數據存入DataFrame t2 = pd.date_range('2019-01-01','2019-12-29', freq='H') t2
DatetimeIndex(['2019-01-01 00:00:00', '2019-01-01 01:00:00', '2019-01-01 02:00:00', '2019-01-01 03:00:00', '2019-01-01 04:00:00', '2019-01-01 05:00:00', '2019-01-01 06:00:00', '2019-01-01 07:00:00', '2019-01-01 08:00:00', '2019-01-01 09:00:00', ... '2019-12-28 15:00:00', '2019-12-28 16:00:00', '2019-12-28 17:00:00', '2019-12-28 18:00:00', '2019-12-28 19:00:00', '2019-12-28 20:00:00', '2019-12-28 21:00:00', '2019-12-28 22:00:00', '2019-12-28 23:00:00', '2019-12-29 00:00:00'], dtype='datetime64[ns]', length=8689, freq='H')
df = DataFrame(index=t2) df
2019-01-01 00:00:00 |
2019-01-01 01:00:00 |
2019-01-01 02:00:00 |
2019-01-01 03:00:00 |
2019-01-01 04:00:00 |
... |
2019-12-28 20:00:00 |
2019-12-28 21:00:00 |
2019-12-28 22:00:00 |
2019-12-28 23:00:00 |
2019-12-29 00:00:00 |
8689 rows × 0 columnspython
# 插入xx公司股票數據 df['AL'] = np.random.randint(80, 160, size=len(t2)) df
AL | |
---|---|
2019-01-01 00:00:00 | 116 |
2019-01-01 01:00:00 | 102 |
2019-01-01 02:00:00 | 124 |
2019-01-01 03:00:00 | 81 |
2019-01-01 04:00:00 | 152 |
... | ... |
2019-12-28 20:00:00 | 114 |
2019-12-28 21:00:00 | 91 |
2019-12-28 22:00:00 | 89 |
2019-12-28 23:00:00 | 159 |
2019-12-29 00:00:00 | 133 |
8689 rows × 1 columnsweb
df['TC'] = np.random.randint(30,50, size=len(t2)) df
AL | TC | |
---|---|---|
2019-01-01 00:00:00 | 116 | 40 |
2019-01-01 01:00:00 | 102 | 43 |
2019-01-01 02:00:00 | 124 | 33 |
2019-01-01 03:00:00 | 81 | 46 |
2019-01-01 04:00:00 | 152 | 49 |
... | ... | ... |
2019-12-28 20:00:00 | 114 | 44 |
2019-12-28 21:00:00 | 91 | 33 |
2019-12-28 22:00:00 | 89 | 44 |
2019-12-28 23:00:00 | 159 | 35 |
2019-12-29 00:00:00 | 133 | 36 |
8689 rows × 2 columnsdom
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x128151110>
# 畫圖 import matplotlib.pyplot as plt plt.show()
# 改爲每週採樣 week_df = DataFrame() week_df['AL'] = df['AL'].resample('W').mean() week_df['TC'] = df['TC'].resample('W').mean() week_df
AL | TC | |
---|---|---|
2019-01-06 | 119.333333 | 39.958333 |
2019-01-13 | 121.142857 | 39.988095 |
2019-01-20 | 119.053571 | 38.898810 |
2019-01-27 | 120.613095 | 38.339286 |
2019-02-03 | 118.833333 | 39.625000 |
2019-02-10 | 115.517857 | 39.392857 |
2019-02-17 | 120.738095 | 38.755952 |
2019-02-24 | 119.440476 | 38.809524 |
2019-03-03 | 122.345238 | 39.470238 |
2019-03-10 | 121.827381 | 39.738095 |
2019-03-17 | 120.660714 | 39.226190 |
2019-03-24 | 118.303571 | 39.011905 |
2019-03-31 | 119.047619 | 39.672619 |
2019-04-07 | 121.892857 | 38.654762 |
2019-04-14 | 120.613095 | 38.761905 |
2019-04-21 | 118.327381 | 38.791667 |
2019-04-28 | 119.023810 | 40.261905 |
2019-05-05 | 121.297619 | 39.392857 |
2019-05-12 | 120.130952 | 39.238095 |
2019-05-19 | 121.577381 | 39.696429 |
2019-05-26 | 120.148810 | 40.107143 |
2019-06-02 | 118.940476 | 39.458333 |
2019-06-09 | 119.821429 | 39.607143 |
2019-06-16 | 116.351190 | 39.386905 |
2019-06-23 | 118.755952 | 39.619048 |
2019-06-30 | 117.404762 | 39.142857 |
2019-07-07 | 119.898810 | 39.934524 |
2019-07-14 | 118.125000 | 39.559524 |
2019-07-21 | 117.690476 | 39.255952 |
2019-07-28 | 119.113095 | 39.279762 |
2019-08-04 | 118.696429 | 39.357143 |
2019-08-11 | 119.642857 | 39.208333 |
2019-08-18 | 121.511905 | 39.863095 |
2019-08-25 | 117.261905 | 40.125000 |
2019-09-01 | 123.285714 | 40.404762 |
2019-09-08 | 118.470238 | 39.940476 |
2019-09-15 | 116.636905 | 39.107143 |
2019-09-22 | 116.702381 | 39.226190 |
2019-09-29 | 121.327381 | 40.297619 |
2019-10-06 | 120.833333 | 39.410714 |
2019-10-13 | 119.505952 | 38.982143 |
2019-10-20 | 119.946429 | 41.017857 |
2019-10-27 | 118.988095 | 39.482143 |
2019-11-03 | 117.994048 | 39.440476 |
2019-11-10 | 115.077381 | 39.803571 |
2019-11-17 | 119.732143 | 40.238095 |
2019-11-24 | 116.035714 | 38.815476 |
2019-12-01 | 118.250000 | 39.910714 |
2019-12-08 | 120.005952 | 39.434524 |
2019-12-15 | 121.797619 | 40.261905 |
2019-12-22 | 120.553571 | 39.702381 |
2019-12-29 | 121.703448 | 39.724138 |
week_df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x127e82f90>
本文同步分享在 博客「瑞 新」(CSDN)。
若有侵權,請聯繫 support@oschina.cn 刪除。
本文參與「OSC源創計劃」,歡迎正在閱讀的你也加入,一塊兒分享。svg