#數據處理工具 import numpy as np import pandas as pd from pandas import Series,DataFrame
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6, 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
parties = { 'Bachmann, Michelle': 'Republican', 'Romney, Mitt': 'Republican', 'Obama, Barack': 'Democrat', "Roemer, Charles E. 'Buddy' III": 'Reform', 'Pawlenty, Timothy': 'Republican', 'Johnson, Gary Earl': 'Libertarian', 'Paul, Ron': 'Republican', 'Santorum, Rick': 'Republican', 'Cain, Herman': 'Republican', 'Gingrich, Newt': 'Republican', 'McCotter, Thaddeus G': 'Republican', 'Huntsman, Jon': 'Republican', 'Perry, Rick': 'Republican' }
數據下載地址: 連接:https://pan.baidu.com/s/19_-s3Xv_fiYkMtIca-stdw 提取碼:iwjt
data = pd.read_csv('./data/usa_election.txt') data.head() #查看前五行數據
字段解釋工具
cmte_id :候選人ID
cand_nm :候選人姓名
contbr_nm : 捐贈人姓名
contbr_st :捐贈人所在州
contbr_employer : 捐贈人所在公司
contbr_occupation : 捐贈人職業
contb_receipt_amt :捐贈數額(美圓)
contb_receipt_dt : 捐款的日期
建立一個各個候選人所在的黨派partyspa
data['party'] =data['cand_nm'].map(parties) data.head()
party這一列中有哪些元素code
data['party'].unique() #元素:array(['Republican', 'Democrat', 'Reform', 'Libertarian'], dtype=object)
統計party列中各個元素出現次數,value_counts()是Series中的,無參,返回一個帶有每一個元素出現次數的Seriesorm
data['party'].value_counts() #value_counts() 統計個數 #統計出來的個數 Democrat 292400 Republican 237575 Reform 5364 Libertarian 702 Name: party, dtype: int64
查看各個黨派收到的政治獻金總數contb_receipt_amtblog
data.groupby(by='party',axis=0)['contb_receipt_amt'].sum() #數據 party Democrat 8.105758e+07 Libertarian 4.132769e+05 Reform 3.390338e+05 Republican 1.192255e+08 Name: contb_receipt_amt, dtype: float64
查看具體天天各個黨派收到的政治獻金總數contb_receipt_amt ip
data.groupby(by=['party','contb_receipt_dt'],axis=0)['contb_receipt_amt'].sum()
將表中日期格式轉換爲'yyy-mm-dd'pandas
def transform_date(d): day,month,year = d.split('-') month = months[month] return '20'+year+'-'+str(month)+'-'+day date = data['contb_receipt_dt'].map(transform_date) data['contb_receipt_dt'] = date data.head()
查看老兵(捐獻者職業)DISABLED VETERAN主要支持誰 :查看老兵們捐贈給誰的錢最多it
data['contbr_occupation'] == 'DISABLED VETERAN' old_bing_df = data.loc[data['contbr_occupation'] == 'DISABLED VETERAN'] old_bing_df.head()
對竟選者進行分組io
old_bing_df.groupby(by='cand_nm',axis=0)['contb_receipt_amt'].sum()
找出投資的最大值form
data['contb_receipt_amt'].max()
找出候選人的捐贈者中,捐贈金額最大的人的職業以及捐獻額 .經過query("查詢條件來查找捐獻人職業")
data.query('contb_receipt_amt == %f'%data['contb_receipt_amt'].max())