import pandas as pd
import random
import numpy as np
n_rows=5
n_cols=2
df = pd.DataFrame(np.random.randn(n_rows, n_cols),
index = pd.date_range('1/1/2000', periods=n_rows),
columns = ['A','B'])
df=df.apply(lambda x:[int(xx*10) for xx in x],axis=0)
df
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
5 |
-4 |
2000-01-03 |
-2 |
8 |
2000-01-04 |
0 |
1 |
2000-01-05 |
-18 |
3 |
pct_change
## pct_change() to compute the percent change over a given number of periods
df.pct_change(periods=1) # b{t}=(a{t}-a{t-1})/a{t-1}
|
A |
B |
2000-01-01 |
NaN |
NaN |
2000-01-02 |
-1.277778 |
-2.333333 |
2000-01-03 |
-1.400000 |
-3.000000 |
2000-01-04 |
-1.000000 |
-0.875000 |
2000-01-05 |
-inf |
2.000000 |
df.pct_change(periods=2) # b{t}=(a{t}-a{t-2})/a{t-2}
|
A |
B |
2000-01-01 |
NaN |
NaN |
2000-01-02 |
NaN |
NaN |
2000-01-03 |
-0.888889 |
1.666667 |
2000-01-04 |
-1.000000 |
-1.250000 |
2000-01-05 |
8.000000 |
-0.625000 |
Covariance
df.cov()
|
A |
B |
A |
114.80 |
-17.85 |
B |
-17.85 |
18.70 |
df.A.cov(df.B)
-17.849999999999998
Correlation
df.corr()
|
A |
B |
A |
1.000000 |
-0.385253 |
B |
-0.385253 |
1.000000 |
Data ranking
df.rank()
|
A |
B |
2000-01-01 |
1.5 |
3.5 |
2000-01-02 |
5.0 |
1.0 |
2000-01-03 |
3.0 |
5.0 |
2000-01-04 |
4.0 |
2.0 |
2000-01-05 |
1.5 |
3.5 |
df.rank(axis=1)
|
A |
B |
2000-01-01 |
1.0 |
2.0 |
2000-01-02 |
2.0 |
1.0 |
2000-01-03 |
1.0 |
2.0 |
2000-01-04 |
1.0 |
2.0 |
2000-01-05 |
1.0 |
2.0 |
method parameter:
average : average rank of tied group
min : lowest rank in the group
max : highest rank in the group
first : ranks assigned in the order they appear in the array
Window Functions
cumsum
df
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
5 |
-4 |
2000-01-03 |
-2 |
8 |
2000-01-04 |
0 |
1 |
2000-01-05 |
-18 |
3 |
df.cumsum()
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
-13 |
-1 |
2000-01-03 |
-15 |
7 |
2000-01-04 |
-15 |
8 |
2000-01-05 |
-33 |
11 |
rolling
df
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
5 |
-4 |
2000-01-03 |
-2 |
8 |
2000-01-04 |
0 |
1 |
2000-01-05 |
-18 |
3 |
r=df.rolling(window=2)
r.mean()
|
A |
B |
2000-01-01 |
NaN |
NaN |
2000-01-02 |
-6.5 |
-0.5 |
2000-01-03 |
1.5 |
2.0 |
2000-01-04 |
-1.0 |
4.5 |
2000-01-05 |
-9.0 |
2.0 |
r.count()
|
A |
B |
2000-01-01 |
1.0 |
1.0 |
2000-01-02 |
2.0 |
2.0 |
2000-01-03 |
2.0 |
2.0 |
2000-01-04 |
2.0 |
2.0 |
2000-01-05 |
2.0 |
2.0 |
r.max()
|
A |
B |
2000-01-01 |
NaN |
NaN |
2000-01-02 |
5.0 |
3.0 |
2000-01-03 |
5.0 |
8.0 |
2000-01-04 |
0.0 |
8.0 |
2000-01-05 |
0.0 |
3.0 |
count() |
Number of non-null observations |
sum() |
Sum of values |
mean() |
Mean of values |
median() |
Arithmetic median of values |
min() |
Minimum |
max() |
Maximum |
std() |
Bessel-corrected sample standard deviation |
var() |
Unbiased variance |
skew() |
Sample skewness (3rd moment) |
kurt() |
Sample kurtosis (4th moment) |
quantile() |
Sample quantile (value at %) |
apply() |
Generic apply |
cov() |
Unbiased covariance (binary) |
corr() |
Correlation (binary) |
win_type can specify distribution function.
parameter 'on' to specify a column (rather than the default of the index) in a DataFrame.python
df
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
5 |
-4 |
2000-01-03 |
-2 |
8 |
2000-01-04 |
0 |
1 |
2000-01-05 |
-18 |
3 |
df.rolling(window='3d',min_periods=3).sum() ## 最近三天
|
A |
B |
2000-01-01 |
NaN |
NaN |
2000-01-02 |
NaN |
NaN |
2000-01-03 |
-15.0 |
7.0 |
2000-01-04 |
3.0 |
5.0 |
2000-01-05 |
-20.0 |
12.0 |
expanding
df
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
5 |
-4 |
2000-01-03 |
-2 |
8 |
2000-01-04 |
0 |
1 |
2000-01-05 |
-18 |
3 |
df.expanding().mean() ## statistic with all data up to a point in time
|
A |
B |
2000-01-01 |
-18.00 |
3.000000 |
2000-01-02 |
-6.50 |
-0.500000 |
2000-01-03 |
-5.00 |
2.333333 |
2000-01-04 |
-3.75 |
2.000000 |
2000-01-05 |
-6.60 |
2.200000 |
Exponentially Weighted Windows(ewm)
df
|
A |
B |
2000-01-01 |
-18 |
3 |
2000-01-02 |
5 |
-4 |
2000-01-03 |
-2 |
8 |
2000-01-04 |
0 |
1 |
2000-01-05 |
-18 |
3 |
df.ewm(alpha=0.9).mean()
|
A |
B |
2000-01-01 |
-18.000000 |
3.000000 |
2000-01-02 |
2.909091 |
-3.363636 |
2000-01-03 |
-1.513514 |
6.873874 |
2000-01-04 |
-0.151215 |
1.586859 |
2000-01-05 |
-16.215282 |
2.858699 |