1 df.sample(10, replace = True)
2 df.sample(3)
3 df.sample(frac = 0.5) # 按比例抽樣
4 df.sample(frac = 10, replace = True,weights = np.random.randint(1,10,6)) # 對樣本加權
5 df.sample(3, axis = 1) # 變量抽樣
numpy.random.rand(3, 2) # 按維度生成[0,1)之間的均勻分佈隨機數
np.random.randn(2,5) # 按維度生成標準正太分佈隨機數
np.random.randint(2, size=10) # randint(low[, high, size])生成隨機整數,默認low爲0,high必填,size默認爲1
np.random.bytes(10) # 返回隨機字節
a=np.arange(10)
np.random.shuffle(a) # 洗牌
a=np.arange(9).reshape(3, 3)
np.random.shuffle(a) # 如果數組,則只會打亂第一維
np.random.permutation(10) # 隨機排列,對於多維序列也適用
np.random.permutation(10) .reshape(2, 5)
np.random.seed(1000) # 種子
np.random.normal(2,3,[5,2]) # 高斯分佈,其餘分佈可查
# http://docs.scipy.org/doc/numpy-1.10.1/reference/routines.random.html
np.random.seed(12345678)
x = scipy.stats.norm.rvs(loc=5, scale=3, size=100) # 另外scipy也有這些隨機數的生成,附帶檢驗
scipy.stats.shapiro(x)
# http://docs.scipy.org/doc/scipy-0.17.0/reference/stats.html
1 # gather:
2 def gather( df, key, value, cols ):
3 id_vars = [ col for col in df.columns if col not in cols ]
4 id_values = cols
5 var_name = key
6 value_name = value
7 return pandas.melt( df, id_vars, id_values, var_name, value_name )
8 # 以上是定義的一個函數,實際上同樣的,橫變豎,是gather,豎變橫,是spread
9 pd.melt(df, id_vars=['E','F'], value_vars=['A','C'])
10 # spread:
11 pd.pivot(df["D"],df["E"],df['F']) #這個是豎變橫
12 df3=pd.pivot(df2['D'],df2['variable'],df2['value'])
13 df3.reset_index(level=0, inplace=True) # 再變回df的樣子
1 [",".join(['a','b','d'])]
2 df[['E','F']].groupby('F')['E'].apply(lambda x: "{%s}" % ', '.join(x)) # 分組拼接,前提是這些列都要是字符串
3 df[['E','F']].applymap(str).groupby('E')['F'].apply(lambda x: "%s" % ', '.join(x)) # 因此能夠這樣
1 import random,string
2 df2 = pd.DataFrame(range(10),columns=['y'])
3 df2["x"] = [",".join(random.sample(string.lowercase,random.randint(2,5))) for i in range(10)]
1 # 用20 的示例數據
2 df3=pd.DataFrame(df2.x.str.split(',').tolist(),index=df2.y).stack().reset_index(level=0)
3 df3.columns=["y","x"]
1 df[["F","E"]].drop_duplicates()
1 pd.cut(df.A,range(-1,2,1))