數據挖掘:Pandas時間模塊管理!

datetime

import numpy as np
import pandas as pd
import datetimedom

# datetime.date

t = datetime.date.today()  # datetime 模塊 date 類 today() 類方法
print(t, type(t))
print("")

t_str = str(t)
print(t, type(t_str))函數

2019-05-26 <class 'datetime.date'>

2019-05-26 <class 'str'>code

# datetime.datetime

now = datetime.datetime.now()
print(now, type(now))orm

2019-05-26 16:18:17.612845 <class 'datetime.datetime'>對象

# datetime.timedelta 時間差

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)索引

2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00string

pd.Timestamp 跟datetime做用相同 只不過Timestamp是pd模塊裏的

import numpy as np
import pandas as pd
import datetimepandas

t = datetime.datetime.today()
print(t)
print("")

ts1 = pd.Timestamp(t)
print(ts1)
print("")

ts2 = pd.Timestamp("20171021")
print(ts2)io

2020-06-01 14:28:08.656056

2020-06-01 14:28:08.656056

2017-10-21 00:00:00class

pd.to_datetime 多個時間數據轉換成時間戳索引

time_list = ["20171019", "20181020", "20191021"]

t = pd.to_datetime(time_list)
print(t, type(t))

DatetimeIndex(['2017-10-19', '2018-10-20', '2019-10-21'], dtype='datetime64[ns]', freq=None) <class 'pandas.core.indexes.datetimes.DatetimeIndex'>

若是時間序列裏包含非時間的數據 ignore(忽略異常 但不會轉換成時間戳索引) coerce(把異常值改成NaT)

time_list1 = ["20171019", "20181020", "bbbb", "20191021"]

t1= pd.to_datetime(time_list1, errors="ignore")
print(t1, type(t1))
print("")

t2 = pd.to_datetime(time_list1, errors="coerce")
print(t2)

Index(['20171019', '20181020', 'bbbb', '20191021'], dtype='object') <class 'pandas.core.indexes.base.Index'>

DatetimeIndex(['2017-10-19', '2018-10-20', 'NaT', '2019-10-21'], dtype='datetime64[ns]', freq=None)

pd.DatetimeIndex() 直接生成時間戳序列

rng = pd.DatetimeIndex(["20160910", "11/06/2017", "20180821", "26/05/2019"])
print(rng)
print(type(rng))
print("")

print(rng[0], type(rng[0]))

DatetimeIndex(['2016-09-10', '2017-11-06', '2018-08-21', '2019-05-26'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

2016-09-10 00:00:00 <class 'pandas._libs.tslibs.timestamps.Timestamp'>

st = pd.Series(np.random.rand(4), index=rng) # 把時間戳索引當成index
print(st)

2016-09-10   0.835586
2017-11-06   0.223044
2018-08-21   0.950717
2019-05-26   0.013370
dtype: float64

pd.date_range() 生成日期範圍

"""
pd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)

該函數主要用於生成一個固定頻率的時間索引,在調用構造方法時,必須指定start、end、periods中的兩個參數值,不然報錯。

start: 開始日期

end: 結束日期

periods:固定時期,取值爲整數或None

freq:日期偏移量,取值爲string或DateOffset,默認爲'D'

normalize:若參數爲True表示將start、end參數值正則化到午夜時間戳 0:00:00 默認爲False

name:生成時間索引對象的名稱,取值爲string或None

closed:能夠理解成在closed=None狀況下返回的結果中,若closed=‘left’表示在返回的結果基礎上,再取左開右閉的結果,若closed='right'表示在返回的結果基礎上,再取左閉右開的結果

"""

"\npd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)\n\n該函數主要用於生成一個固定頻率的時間索引,在調用構造方法時,必須指定start、end、periods中的兩個參數值,不然報錯。\n\nstart: 開始日期\n\nend: 結束日期\n\nperiods:固定時期,取值爲整數或None\n\nfreq:日期偏移量,取值爲string或DateOffset,默認爲'D'\n\nnormalize:若參數爲True表示將start、end參數值正則化到午夜時間戳 0:00:00 默認爲False\n\nname:生成時間索引對象的名稱,取值爲string或None\n\nclosed:能夠理解成在closed=None狀況下返回的結果中,若closed=‘left’表示在返回的結果基礎上,再取左開右閉的結果,若closed='right'表示在返回的結果基礎上,再取左閉右開的結果\n\n"

start end

t_index1 = pd.date_range(start="20181018", end="20191021", name="t_index1")
print(t_index1)

DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
'2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
'2018-10-26', '2018-10-27',
...
'2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
'2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
'2019-10-20', '2019-10-21'],
dtype='datetime64[ns]', name='t_index1', length=369, freq='D')

periods

t_index2 = pd.date_range(start="20181018", periods=10, name="t_index2")
print(t_index2)

DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
'2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
'2018-10-26', '2018-10-27'],
dtype='datetime64[ns]', name='t_index2', freq='D')

t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)

DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
'2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
'2018-10-17', '2018-10-18'],
dtype='datetime64[ns]', name='t_index3', freq='D')

name normalize

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4")
print(t_index4)
print("\n")

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4", normalize=True)
print(t_index4)

DatetimeIndex(['2019-11-09 16:30:00', '2019-11-10 16:30:00',
'2019-11-11 16:30:00', '2019-11-12 16:30:00',
'2019-11-13 16:30:00', '2019-11-14 16:30:00',
'2019-11-15 16:30:00', '2019-11-16 16:30:00',
'2019-11-17 16:30:00', '2019-11-18 16:30:00'],
dtype='datetime64[ns]', name='t_index4', freq='D')

DatetimeIndex(['2019-11-09', '2019-11-10', '2019-11-11', '2019-11-12',
'2019-11-13', '2019-11-14', '2019-11-15', '2019-11-16',
'2019-11-17', '2019-11-18'],
dtype='datetime64[ns]', name='t_index4', freq='D')

closed

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5")
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="left") # 左閉右開
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="right") # 左開右閉
print(t_index5)
print("\n")

DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
'2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17',
'2019-09-18'],
dtype='datetime64[ns]', name='t_index5', freq='D')

DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
'2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17'],
dtype='datetime64[ns]', name='t_index5', freq='D')

DatetimeIndex(['2019-09-11', '2019-09-12', '2019-09-13', '2019-09-14',
'2019-09-15', '2019-09-16', '2019-09-17', '2019-09-18'],
dtype='datetime64[ns]', name='t_index5', freq='D')

pd.bdate_range() 默認頻率爲工做日

t_index6 = pd.bdate_range(start="20191001", end="20191007", name="t_index6")
print(t_index6)

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-07'],
dtype='datetime64[ns]', name='t_index6', freq='B')

pd.date_range 轉換成list 元素爲時間戳Timestamp

t_index7_list= pd.date_range(start="20191001", end="20191007", name="t_index7_list")
print(t_index7_list)
print("\n")

t_index7_list= list(pd.date_range(start="20191001", end="20191007", name="t_index7_list"))
print(t_index7_list)

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-05', '2019-10-06', '2019-10-07'],
dtype='datetime64[ns]', name='t_index7_list', freq='D')

[Timestamp('2019-10-01 00:00:00', freq='D'), Timestamp('2019-10-02 00:00:00', freq='D'), Timestamp('2019-10-03 00:00:00', freq='D'), Timestamp('2019-10-04 00:00:00', freq='D'), Timestamp('2019-10-05 00:00:00', freq='D'), Timestamp('2019-10-06 00:00:00', freq='D'), Timestamp('2019-10-07 00:00:00', freq='D')]

fred 日期偏移量

# 默認freq = 'D' 每日

pd.date_range("10/1/2019", "2019/10/7")

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-05', '2019-10-06', '2019-10-07'],
dtype='datetime64[ns]', freq='D')

# 'B' 每工做日

pd.date_range("10/01/2019", "10/07/2019", freq = "B")

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-07'],
dtype='datetime64[ns]', freq='B')

# H 每小時

pd.date_range("10/01/2019 12:00:00", "10/02/2019 12:00:00", freq = "H")

DatetimeIndex(['2019-10-01 12:00:00', '2019-10-01 13:00:00',
'2019-10-01 14:00:00', '2019-10-01 15:00:00',
'2019-10-01 16:00:00', '2019-10-01 17:00:00',
'2019-10-01 18:00:00', '2019-10-01 19:00:00',
'2019-10-01 20:00:00', '2019-10-01 21:00:00',
'2019-10-01 22:00:00', '2019-10-01 23:00:00',
'2019-10-02 00:00:00', '2019-10-02 01:00:00',
'2019-10-02 02:00:00', '2019-10-02 03:00:00',
'2019-10-02 04:00:00', '2019-10-02 05:00:00',
'2019-10-02 06:00:00', '2019-10-02 07:00:00',
'2019-10-02 08:00:00', '2019-10-02 09:00:00',
'2019-10-02 10:00:00', '2019-10-02 11:00:00',
'2019-10-02 12:00:00'],
dtype='datetime64[ns]', freq='H')

# T/MIN 每分

pd.date_range("10/01/2019 12:10:00" , "10/01/2019 12:30:00", freq = "T")

DatetimeIndex(['2019-10-01 12:10:00', '2019-10-01 12:11:00',
'2019-10-01 12:12:00', '2019-10-01 12:13:00',
'2019-10-01 12:14:00', '2019-10-01 12:15:00',
'2019-10-01 12:16:00', '2019-10-01 12:17:00',
'2019-10-01 12:18:00', '2019-10-01 12:19:00',
'2019-10-01 12:20:00', '2019-10-01 12:21:00',
'2019-10-01 12:22:00', '2019-10-01 12:23:00',
'2019-10-01 12:24:00', '2019-10-01 12:25:00',
'2019-10-01 12:26:00', '2019-10-01 12:27:00',
'2019-10-01 12:28:00', '2019-10-01 12:29:00',
'2019-10-01 12:30:00'],
dtype='datetime64[ns]', freq='T')

# S 每秒

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "S")

DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 00:00:01',
'2019-10-01 00:00:02', '2019-10-01 00:00:03',
'2019-10-01 00:00:04', '2019-10-01 00:00:05',
'2019-10-01 00:00:06', '2019-10-01 00:00:07',
'2019-10-01 00:00:08', '2019-10-01 00:00:09',
'2019-10-01 00:00:10', '2019-10-01 00:00:11',
'2019-10-01 00:00:12', '2019-10-01 00:00:13',
'2019-10-01 00:00:14', '2019-10-01 00:00:15',
'2019-10-01 00:00:16', '2019-10-01 00:00:17',
'2019-10-01 00:00:18', '2019-10-01 00:00:19',
'2019-10-01 00:00:20', '2019-10-01 00:00:21',
'2019-10-01 00:00:22', '2019-10-01 00:00:23',
'2019-10-01 00:00:24', '2019-10-01 00:00:25',
'2019-10-01 00:00:26', '2019-10-01 00:00:27',
'2019-10-01 00:00:28', '2019-10-01 00:00:29',
'2019-10-01 00:00:30'],
dtype='datetime64[ns]', freq='S')

# L 每毫秒 (千分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "L")

DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.001000',
'2019-10-01 00:00:00.002000', '2019-10-01 00:00:00.003000',
'2019-10-01 00:00:00.004000', '2019-10-01 00:00:00.005000',
'2019-10-01 00:00:00.006000', '2019-10-01 00:00:00.007000',
'2019-10-01 00:00:00.008000', '2019-10-01 00:00:00.009000',
...
'2019-10-01 00:00:29.991000', '2019-10-01 00:00:29.992000',
'2019-10-01 00:00:29.993000', '2019-10-01 00:00:29.994000',
'2019-10-01 00:00:29.995000', '2019-10-01 00:00:29.996000',
'2019-10-01 00:00:29.997000', '2019-10-01 00:00:29.998000',
'2019-10-01 00:00:29.999000',       '2019-10-01 00:00:30'],
dtype='datetime64[ns]', length=30001, freq='L')

# U 每微秒 (百萬分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百萬分之一秒)

DatetimeIndex([ '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',

'2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
           '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
           '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
           '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
           ...
           '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
           '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
           '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
           '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
           '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'\],
          dtype='datetime64\[ns\]', length=30000001, freq='U')

星期幾縮寫 -- MON/TUE/WED/THU/FRI/SAT/SUN

# "W-MON"指定從星期一開始算起 間隔是每週

pd.date_range("2019/10/1", "2019/11/1", freq = "W-MON")

DatetimeIndex(['2019-10-07', '2019-10-14', '2019-10-21', '2019-10-28'], dtype='datetime64[ns]', freq='W-MON')

# "WOM-2MON" 指定每個月從2第個星期一開始算起 間隔是月

pd.date_range("2019/10/1", "2020/10/1", freq = "WOM-2MON")

DatetimeIndex(['2019-10-14', '2019-11-11', '2019-12-09', '2020-01-13',
'2020-02-10', '2020-03-09', '2020-04-13', '2020-05-11',
'2020-06-08', '2020-07-13', '2020-08-10', '2020-09-14'],
dtype='datetime64[ns]', freq='WOM-2MON')

# M -- 每個月最後一個日曆日

pd.date_range("2019", "2020", freq = "M")

DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
'2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
'2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
dtype='datetime64[ns]', freq='M')

月份

"""
一月     Jan.     January

二月     Feb.     February

三月     Mar.     March

四月     Apr.     April

五月     May.     May

六月     Jun.     June

七月     Jul.     July

八月     Aug.     August

九月     Sept.   September

十月     Oct.     October

十一月   Nov.     November

十二月   Dec.     December
"""

# Q 每一個季度末最後一月的最後一個日曆日

print(pd.date_range("2019", "2020", freq="Q-JAN"))

print(pd.date_range("2019", "2020", freq="Q-FEB"))

print(pd.date_range("2019", "2020", freq="Q-MAR"))
print("")

# 因此Q-月只有三種狀況 1-4-7-10, 2-5-8-11, 3-6-9-12
print(pd.date_range("2019", "2020", freq="Q-APR"))

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-JAN')
DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-31', '2019-11-30'], dtype='datetime64[ns]', freq='Q-FEB')
DatetimeIndex(['2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'], dtype='datetime64[ns]', freq='Q-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-APR')

# A -- 每一年指定月份的最後一個日曆日

print(pd.date_range("2019", "2021", freq="A-JAN"))
print(pd.date_range("2019", "2021", freq="A-FEB"))
print(pd.date_range("2019", "2021", freq="A-DEC"))

DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='A-JAN')
DatetimeIndex(['2019-02-28', '2020-02-29'], dtype='datetime64[ns]', freq='A-FEB')
DatetimeIndex(['2019-12-31', '2020-12-31'], dtype='datetime64[ns]', freq='A-DEC')

# BM - 每個月最後一個工做日

print(pd.date_range("2019", "2020", freq="BM"))

DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-29', '2019-04-30',
'2019-05-31', '2019-06-28', '2019-07-31', '2019-08-30',
'2019-09-30', '2019-10-31', '2019-11-29', '2019-12-31'],
dtype='datetime64[ns]', freq='BM')

# BQ - 每一個季度末最後一月的最後一個工做日

print(pd.date_range("2019", "2021", freq="BQ-JAN"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-FEB"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-MAR"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-APR"))

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
'2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
dtype='datetime64[ns]', freq='BQ-JAN')

DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-30', '2019-11-29',
'2020-02-28', '2020-05-29', '2020-08-31', '2020-11-30'],
dtype='datetime64[ns]', freq='BQ-FEB')

DatetimeIndex(['2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31',
'2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31'],
dtype='datetime64[ns]', freq='BQ-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
'2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
dtype='datetime64[ns]', freq='BQ-APR')

# BA -- 每一年指定月份的最後一個工做日

print(pd.date_range("2019", "2021", freq="BA-JAN"))
print(pd.date_range("2019", "2023", freq="BA-FEB"))
print(pd.date_range("2019", "2021", freq="BA-MAR"))

DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='BA-JAN')
DatetimeIndex(['2019-02-28', '2020-02-28', '2021-02-26', '2022-02-28'], dtype='datetime64[ns]', freq='BA-FEB')
DatetimeIndex(['2019-03-29', '2020-03-31'], dtype='datetime64[ns]', freq='BA-MAR')

# MS -- 每個月第一個日曆日

pd.date_range("2019", "2020", freq="MS")

DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
'2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
'2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='MS')

# QS - 每一個季度末最後一月的第一個日曆日

print(pd.date_range("2019", "2020", freq="QS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="QS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="QS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="QS-APR"))

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='QS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='QS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-01', '2019-09-01', '2019-12-01'], dtype='datetime64[ns]', freq='QS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='QS-APR')

# AS -- 每一年指定月份的第一個日曆日

print(pd.date_range("2019", "2021", freq="AS-JAN"))
print(pd.date_range("2019", "2021", freq="AS-FEB"))
print(pd.date_range("2019", "2021", freq="AS-DEC"))

DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='AS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-01'], dtype='datetime64[ns]', freq='AS-FEB')
DatetimeIndex(['2019-12-01', '2020-12-01'], dtype='datetime64[ns]', freq='AS-DEC')

# BMS -- 每個月第一個工做日

print(pd.date_range("2019", "2021", freq="BMS"))

DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
'2019-05-01', '2019-06-03', '2019-07-01', '2019-08-01',
'2019-09-02', '2019-10-01', '2019-11-01', '2019-12-02',
'2020-01-01', '2020-02-03', '2020-03-02', '2020-04-01',
'2020-05-01', '2020-06-01', '2020-07-01', '2020-08-03',
'2020-09-01', '2020-10-01', '2020-11-02', '2020-12-01',
'2021-01-01'],
dtype='datetime64[ns]', freq='BMS')

# BQS - 每一個季度末最後一月的第一個工做日

print(pd.date_range("2019", "2020", freq="BQS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-APR"))

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='BQS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='BQS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-03', '2019-09-02', '2019-12-02'], dtype='datetime64[ns]', freq='BQS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='BQS-APR')

# BAS -- 每一年指定月份的第一個工做日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))

DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')

複合頻率

# 7D 間隔是7天

pd.date_range("2019/10/1", "2019/12/1", freq="7D")

DatetimeIndex(['2019-10-01', '2019-10-08', '2019-10-15', '2019-10-22',
'2019-10-29', '2019-11-05', '2019-11-12', '2019-11-19',
'2019-11-26'],
dtype='datetime64[ns]', freq='7D')

# 2h30min 間隔是2小時30分鐘

pd.date_range("2019/10/1 00:00:00", "2019/10/1 12:00:00", freq="2h30min")

DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 02:30:00',
'2019-10-01 05:00:00', '2019-10-01 07:30:00',
'2019-10-01 10:00:00'],
dtype='datetime64[ns]', freq='150T')

# 2M 每間隔2個月最後一個日曆

pd.date_range("2019", "2021", freq="2M")

DatetimeIndex(['2019-01-31', '2019-03-31', '2019-05-31', '2019-07-31',
'2019-09-30', '2019-11-30', '2020-01-31', '2020-03-31',
'2020-05-31', '2020-07-31', '2020-09-30', '2020-11-30'],
dtype='datetime64[ns]', freq='2M')

asfreq 時間頻率轉換

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

# 這裏是把D改成4H
print(ts.asfreq("4H"))
print("\n")

# method 插值模式 ffill 用以前值填充 bfill 用以後值填充
print(ts.asfreq("4H", method="ffill"))
print("\n")

print(ts.asfreq("4H", method="bfill"))

2019-01-01   0.610403
2019-01-02   0.416557
2019-01-03   0.821631
2019-01-04   0.699457
Freq: D, dtype: float64

2019-01-01 00:00:00   0.610403
2019-01-01 04:00:00         NaN
2019-01-01 08:00:00         NaN
2019-01-01 12:00:00         NaN
2019-01-01 16:00:00         NaN
2019-01-01 20:00:00         NaN
2019-01-02 00:00:00   0.416557
2019-01-02 04:00:00         NaN
2019-01-02 08:00:00         NaN
2019-01-02 12:00:00         NaN
2019-01-02 16:00:00         NaN
2019-01-02 20:00:00         NaN
2019-01-03 00:00:00   0.821631
2019-01-03 04:00:00         NaN
2019-01-03 08:00:00         NaN
2019-01-03 12:00:00         NaN
2019-01-03 16:00:00         NaN
2019-01-03 20:00:00         NaN
2019-01-04 00:00:00   0.699457
Freq: 4H, dtype: float64

2019-01-01 00:00:00   0.610403
2019-01-01 04:00:00   0.610403
2019-01-01 08:00:00   0.610403
2019-01-01 12:00:00   0.610403
2019-01-01 16:00:00   0.610403
2019-01-01 20:00:00   0.610403
2019-01-02 00:00:00   0.416557
2019-01-02 04:00:00   0.416557
2019-01-02 08:00:00   0.416557
2019-01-02 12:00:00   0.416557
2019-01-02 16:00:00   0.416557
2019-01-02 20:00:00   0.416557
2019-01-03 00:00:00   0.821631
2019-01-03 04:00:00   0.821631
2019-01-03 08:00:00   0.821631
2019-01-03 12:00:00   0.821631
2019-01-03 16:00:00   0.821631
2019-01-03 20:00:00   0.821631
2019-01-04 00:00:00   0.699457
Freq: 4H, dtype: float64

2019-01-01 00:00:00   0.610403
2019-01-01 04:00:00   0.416557
2019-01-01 08:00:00   0.416557
2019-01-01 12:00:00   0.416557
2019-01-01 16:00:00   0.416557
2019-01-01 20:00:00   0.416557
2019-01-02 00:00:00   0.416557
2019-01-02 04:00:00   0.821631
2019-01-02 08:00:00   0.821631
2019-01-02 12:00:00   0.821631
2019-01-02 16:00:00   0.821631
2019-01-02 20:00:00   0.821631
2019-01-03 00:00:00   0.821631
2019-01-03 04:00:00   0.699457
2019-01-03 08:00:00   0.699457
2019-01-03 12:00:00   0.699457
2019-01-03 16:00:00   0.699457
2019-01-03 20:00:00   0.699457
2019-01-04 00:00:00   0.699457
Freq: 4H, dtype: float64

超前/滯後數據 shift(正數): 數值後移--滯後 ,shift(負數): 數值前移--超前

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

print(ts.shift(1))
print("\n")

print(ts.shift(-2))
print("\n")

# 計算變化百分比 該時間戳的值與上一個時間戳的值相比

per = ts/ts.shift(1)
print(per)

2019-01-01   0.197884
2019-01-02   0.403093
2019-01-03   0.208341
2019-01-04   0.330873
Freq: D, dtype: float64

2019-01-01         NaN
2019-01-02   0.197884
2019-01-03   0.403093
2019-01-04   0.208341
Freq: D, dtype: float64

2019-01-01   0.208341
2019-01-02   0.330873
2019-01-03         NaN
2019-01-04         NaN
Freq: D, dtype: float64

2019-01-01         NaN
2019-01-02   2.037017
2019-01-03   0.516855
2019-01-04   1.588134
Freq: D, dtype: float64

shift(freq) 加上freq參數 對時間戳進行位移 而不是對數值進行位移

print(ts)
print("\n")

print(ts.shift(2, freq="D")) # 按天
print("\n")

print(ts.shift(2, freq="T")) # 按分鐘

2019-01-01   0.197884
2019-01-02   0.403093
2019-01-03   0.208341
2019-01-04   0.330873
Freq: D, dtype: float64

2019-01-03   0.197884
2019-01-04   0.403093
2019-01-05   0.208341
2019-01-06   0.330873
Freq: D, dtype: float64

2019-01-01 00:02:00   0.197884 2019-01-02 00:02:00   0.403093 2019-01-03 00:02:00   0.208341 2019-01-04 00:02:00   0.330873 Freq: D, dtype: float64

相關文章
相關標籤/搜索