3-6 merge操做

 

In [1]:
import pandas as pd
In [6]:
left =pd.DataFrame({ 'A':['A0','A1','A2','A3'],
                   'B':['B0','B1','B2','B3'],
                   'key':['K0','K1','K2','K3'],})
right =pd.DataFrame({ 'C':['C0','C1','C2','C3'],
                   'D':['D0','D1','D2','D3'],
                    'key':['K0','K1','K2','K3'],})
In [7]:
left
Out[7]:
 
  A B key
0 A0 B0 K0
1 A1 B1 K1
2 A2 B2 K2
3 A3 B3 K3
In [8]:
right
Out[8]:
 
  C D key
0 C0 D0 K0
1 C1 D1 K1
2 C2 D2 K2
3 C3 D3 K3
 

merge:合併javascript

In [10]:
pd.merge(left,right)#直接合並,重複的就再也不顯示
Out[10]:
 
  A B key C D
0 A0 B0 K0 C0 D0
1 A1 B1 K1 C1 D1
2 A2 B2 K2 C2 D2
3 A3 B3 K3 C3 D3
In [12]:
pd.merge(left,right,on='key')#以key爲界進行合併
Out[12]:
 
  A B key C D
0 A0 B0 K0 C0 D0
1 A1 B1 K1 C1 D1
2 A2 B2 K2 C2 D2
3 A3 B3 K3 C3 D3
In [13]:
left =pd.DataFrame({ 'A':['A0','A1','A2','A3'],
                   'B':['B0','B1','B2','B3'],
                   'key1':['K0','K1','K2','K3'],
                    'key2':['K0','K1','K2','K3']})
right =pd.DataFrame({ 'C':['C0','C1','C2','C3'],
                   'D':['D0','D1','D2','D3'],
                    'key1':['K0','K1','K2','K3'],
                    'key2':['K0','K1','K2','K3']})
In [14]:
left
Out[14]:
 
  A B key1 key2
0 A0 B0 K0 K0
1 A1 B1 K1 K1
2 A2 B2 K2 K2
3 A3 B3 K3 K3
In [16]:
right
Out[16]:
 
  C D key1 key2
0 C0 D0 K0 K0
1 C1 D1 K1 K1
2 C2 D2 K2 K2
3 C3 D3 K3 K3
In [17]:
pd.merge(left,right)#直接合並,重複的就再也不顯示
Out[17]:
 
  A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
3 A3 B3 K3 K3 C3 D3
In [18]:
pd.merge(left,right,on='key1')#以key1爲界進行合併,key2自動分組
Out[18]:
 
  A B key1 key2_x C D key2_y
0 A0 B0 K0 K0 C0 D0 K0
1 A1 B1 K1 K1 C1 D1 K1
2 A2 B2 K2 K2 C2 D2 K2
3 A3 B3 K3 K3 C3 D3 K3
In [19]:
pd.merge(left,right,on=['key1','key2'])#以key1,key2爲界進行合併
Out[19]:
 
  A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
3 A3 B3 K3 K3 C3 D3
 

使key2的值不徹底同樣,right改爲K4css

In [20]:
right =pd.DataFrame({ 'C':['C0','C1','C2','C3'],
                   'D':['D0','D1','D2','D3'],
                    'key1':['K0','K1','K2','K3'],
                    'key2':['K0','K1','K2','K4']})
In [21]:
pd.merge(left,right,on=['key1','key2'])#以key1,key2爲界進行合併,可是key2不一樣的那一行就被刪除
Out[21]:
 
  A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
In [22]:
pd.merge(left,right,on=['key1','key2'],how='outer')#how='outer'爲並集,可是默認是交集
Out[22]:
 
  A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
3 A3 B3 K3 K3 NaN NaN
4 NaN NaN K3 K4 C3 D3
In [23]:
pd.merge(left,right,on=['key1','key2'],how='outer',indicator=True)#指定當前的merge是交集仍是並集
Out[23]:
 
  A B key1 key2 C D _merge
0 A0 B0 K0 K0 C0 D0 both
1 A1 B1 K1 K1 C1 D1 both
2 A2 B2 K2 K2 C2 D2 both
3 A3 B3 K3 K3 NaN NaN left_only
4 NaN NaN K3 K4 C3 D3 right_only
In [24]:
pd.merge(left,right,on=['key1','key2'],how='left')#how='left'指定以left爲基準,也能夠指定其餘爲基準
Out[24]:
 
  A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
3 A3 B3 K3 K3 NaN NaN
相關文章
相關標籤/搜索