博客地址:http://www.cnblogs.com/yudanqu/
1 import numpy as np
2 import pandas as pd
3 from pandas import Series,DataFrame
4
5 #Series
6 s1 = Series([1,2,3], index=['A','B','C'])
7 s2 = Series([4,5,6,7], index=['B','C','D','E'])
8
9 s1 + s2
10 # 結果:A NaN
11 # B 6.0
12 # C 8.0
13 # D NaN
14 # E NaN
15 # dtype: float64
16 # 對應項相加,其餘爲nan
17
18 #DataFrame
19 df1 = DataFrame(np.arange(4).reshape(2,2),index=['A','B'],columns=['BJ','SH'])
20 df2 = DataFrame(np.arange(9).reshape(3,3),index=['A','B','C'],columns=['BJ','SH','GZ'])
21 df1 + df2
22 #結果: BJ GZ SH
23 # A 0.0 NaN 2.0
24 # B 5.0 NaN 7.0
25 # C NaN NaN NaN
26
27 df3 = Datadf3 = DataFrame([[1,2,3],[4,5,np.nan],[7,8,9]],index=['A','B','C'],columns=['c1','c2','c3'])
28 '''
29 c1 c2 c3
30 A 1 2 3.0
31 B 4 5 NaN
32 C 7 8 9.0
33 '''
34 df3.sum()
35 #結果:c1 12.0
36 # c2 15.0
37 # c3 12.0
38 # dtype: float64
39 #這裏的nan與實數相運算並不返回nan
40 df3.sum(axis=1) #則求得每一行的和,即ABC,因爲默認axis=0,因此不寫表示求的列
41
42 df3.min() #求最小值,max同理。總體同理與sum,不考慮nan
43
44 df3.describe() #統計內部數據
45
46
47 # 排序功能
48 #注:randn:正態分佈
49 rand:0到1
50 #Series:
51 s1 = Series(np.random.randn(10))
52 s2 = s1.sort_values() # 根據values排序
53 # 默認參數ascending=True,升序爲True,倒序能夠改參數sacending=False
54 s2.sort_index() # 根據index升序排列
55 #DataFrame:
56 df1 = DataFrame(np.random.randn(40).reshape(8,5),columns=['A','B','C','D','E'])
57 df1['A'].sort_values() #僅僅是對着一列排序,若想總體根據這一列進行排序要多df1進行排序
58 df1.sort_values('A') #則爲所有排序,默認升序
59
60 df1.sort_values('A')[['A','D']] # 根據某列排序並輸出所須要的幾列
61
62
63 #重命名
64 df1.index = df1.index.map(str) #修改dataframe的index
65 df1.rename(index=str.upper,columns=str.lower) #經過map函數改變整個
66 df1.rename(index={'A':'a'},columns={'B':'b'}) #經過字典修改某一項
67 # map函數的參數可使本身定義的函數
68
69
70 #dataframe的merge操做
71 pd.merge(df1,df2,on='name',how='inner') # on表示根據哪列的name來做爲判斷依據,默認爲None,how的參數中好比寫left,那麼就根據左側的df;愛顯示數據,若右邊一個沒有的則補全爲nan,outer是right和left的結合,將全部的都輸出
72 df1 = DataFrame({'key':['A','B','C'],'data_set_1':[1,2,3]})
73 df2 = DataFrame({'key':['X','Y','Z'],'data_set_2':[4,5,6]})
74 pd.merge(df1,df2) # 這時的結果返回爲空,由於merge是對其中key值相同的進行操做
75 # 當相同name的columns(例如此例的key)時,他中的值相同,那麼能夠進行merge
76
77
78 #concatenate和combine
79
80 #~~concatenate:
81
82 #1、array
83 arr1 = np.arange(9).reshape(3,3)
84 arr2 = np.arange(9).reshape(3,3)
85 np.concatenate([arr1,arr2]) #經過列表放在一塊兒
'''
86 output:array([[0, 1, 2],
87 [3, 4, 5],
88 [6, 7, 8],
89 [0, 1, 2],
90 [3, 4, 5],
91 [6, 7, 8]])
92 注:其中concatenate的參數包括axis,能夠決定如何鏈接
'''
93
94 #2、Series
95 s1 = Series([1,2,3],index=['X','Y','Z'])
96 s2 = Series([4,5],index=['A','B'])
97 pd.concat([s1,s2]) #一樣有axis參數,爲0則在下面鏈接,爲1則橫向
98
99 #3、dataframe
100 df1 = DataFrame(np.random.randn(4,3),columns=['X','Y','Z'])
101 df2 = DataFrame(np.random.randn(3,3),columns=['X','Y','A'])
102 pd.concat([df1,df2])
'''
103 Out:
104 A X Y Z
105 0 NaN -0.060523 0.879124 1.673622
106 1 NaN 0.734367 0.708085 -0.133981
107 2 NaN 0.461922 -2.186110 -4.473558
108 3 NaN 1.553153 -2.256533 -0.381862
109 0 1.304371 -0.275638 1.362799 NaN
110 1 -0.357986 -0.273505 0.430566 NaN
111 2 1.406862 1.453295 -0.681261 NaN
'''
112
113 #~~combine:
114
115 #1、Series:
116 s1 = Series([2,np.nan,4,np.nan],index=['A','B','C','D'])
117 s1 = Series([1,2,3,4],index=['A','B','C','D'])
118 s1.combine_first(s2) #把s1中沒有的填充上從s2
119
120 #2、DataFrame:
121 #和series幾乎同樣
做者:漁單渠
博客地址:http://www.cnblogs.com/yudanqu/