python--Numpy and Pandas 筆記01

  博客地址:http://www.cnblogs.com/yudanqu/


1
import numpy as np 2 import pandas as pd 3 from pandas import Series,DataFrame 4 5 #Series 6 s1 = Series([1,2,3], index=['A','B','C']) 7 s2 = Series([4,5,6,7], index=['B','C','D','E']) 8 9 s1 + s2 10 # 結果:A NaN 11 #   B 6.0 12 #    C 8.0 13 #    D NaN 14 #   E NaN 15 #   dtype: float64 16 # 對應項相加,其餘爲nan 17 18 #DataFrame 19 df1 = DataFrame(np.arange(4).reshape(2,2),index=['A','B'],columns=['BJ','SH']) 20 df2 = DataFrame(np.arange(9).reshape(3,3),index=['A','B','C'],columns=['BJ','SH','GZ']) 21 df1 + df2 22 #結果: BJ GZ SH 23 #   A 0.0 NaN 2.0 24 #   B 5.0 NaN 7.0 25 #   C NaN NaN NaN 26 27 df3 = Datadf3 = DataFrame([[1,2,3],[4,5,np.nan],[7,8,9]],index=['A','B','C'],columns=['c1','c2','c3']) 28 ''' 29 c1 c2 c3 30 A 1 2 3.0 31 B 4 5 NaN 32 C 7 8 9.0 33 ''' 34 df3.sum() 35 #結果:c1 12.0 36 #   c2 15.0 37 #   c3 12.0 38 #   dtype: float64 39 #這裏的nan與實數相運算並不返回nan 40 df3.sum(axis=1) #則求得每一行的和,即ABC,因爲默認axis=0,因此不寫表示求的列 41 42 df3.min() #求最小值,max同理。總體同理與sum,不考慮nan 43 44 df3.describe() #統計內部數據 45 46 47 # 排序功能 48 #注:randn:正態分佈 49 rand:0到1
50 #Series: 51 s1 = Series(np.random.randn(10)) 52 s2 = s1.sort_values() # 根據values排序 53 # 默認參數ascending=True,升序爲True,倒序能夠改參數sacending=False 54 s2.sort_index() # 根據index升序排列
55 #DataFrame: 56 df1 = DataFrame(np.random.randn(40).reshape(8,5),columns=['A','B','C','D','E']) 57 df1['A'].sort_values() #僅僅是對着一列排序,若想總體根據這一列進行排序要多df1進行排序 58 df1.sort_values('A') #則爲所有排序,默認升序 59 60 df1.sort_values('A')[['A','D']] # 根據某列排序並輸出所須要的幾列 61 62 63 #重命名 64 df1.index = df1.index.map(str) #修改dataframe的index 65 df1.rename(index=str.upper,columns=str.lower) #經過map函數改變整個 66 df1.rename(index={'A':'a'},columns={'B':'b'}) #經過字典修改某一項 67 # map函數的參數可使本身定義的函數 68 69 70 #dataframe的merge操做 71 pd.merge(df1,df2,on='name',how='inner') # on表示根據哪列的name來做爲判斷依據,默認爲None,how的參數中好比寫left,那麼就根據左側的df;愛顯示數據,若右邊一個沒有的則補全爲nan,outer是right和left的結合,將全部的都輸出 72 df1 = DataFrame({'key':['A','B','C'],'data_set_1':[1,2,3]}) 73 df2 = DataFrame({'key':['X','Y','Z'],'data_set_2':[4,5,6]}) 74 pd.merge(df1,df2) # 這時的結果返回爲空,由於merge是對其中key值相同的進行操做 75 # 當相同name的columns(例如此例的key)時,他中的值相同,那麼能夠進行merge 76 77 78 #concatenate和combine 79 80 #~~concatenate: 81 82 #1、array 83 arr1 = np.arange(9).reshape(3,3) 84 arr2 = np.arange(9).reshape(3,3) 85 np.concatenate([arr1,arr2]) #經過列表放在一塊兒
  ''' 86 output:array([[0, 1, 2], 87 [3, 4, 5], 88 [6, 7, 8], 89 [0, 1, 2], 90 [3, 4, 5], 91 [6, 7, 8]]) 92 注:其中concatenate的參數包括axis,能夠決定如何鏈接
  '''
93 94 #2、Series 95 s1 = Series([1,2,3],index=['X','Y','Z']) 96 s2 = Series([4,5],index=['A','B']) 97 pd.concat([s1,s2]) #一樣有axis參數,爲0則在下面鏈接,爲1則橫向 98 99 #3、dataframe 100 df1 = DataFrame(np.random.randn(4,3),columns=['X','Y','Z']) 101 df2 = DataFrame(np.random.randn(3,3),columns=['X','Y','A']) 102 pd.concat([df1,df2])
  '''
103 Out: 104 A X Y Z 105 0 NaN -0.060523 0.879124 1.673622 106 1 NaN 0.734367 0.708085 -0.133981 107 2 NaN 0.461922 -2.186110 -4.473558 108 3 NaN 1.553153 -2.256533 -0.381862 109 0 1.304371 -0.275638 1.362799 NaN 110 1 -0.357986 -0.273505 0.430566 NaN 111 2 1.406862 1.453295 -0.681261 NaN
  '''
112 113 #~~combine: 114 115 #1、Series: 116 s1 = Series([2,np.nan,4,np.nan],index=['A','B','C','D']) 117 s1 = Series([1,2,3,4],index=['A','B','C','D']) 118 s1.combine_first(s2) #把s1中沒有的填充上從s2 119 120 #2、DataFrame: 121 #和series幾乎同樣



做者:漁單渠
博客地址:http://www.cnblogs.com/yudanqu/
相關文章
相關標籤/搜索