本文是基於Windows系統環境,學習和測試pandas模塊:python
Windows 10學習
PyCharm 2018.3.5 for Windows (exe)測試
python 3.6.8 Windows x86 executable installerspa
1. 讀取csv/txt文件.net
讀取txt文件,設置分隔符爲‘,’,設置是否跳過第一行code
import pandas as pd data = pandas.read_csv('test.txt', sep=',', header=None) print(data)
讀取某一行blog
import pandas as pd data = pandas.read_csv('test.txt') index = 3 printf(data.ix[index]) # 讀取第三行
讀取某一列pandas
import pandas as pd data = pandas.read_csv('test.txt') printf(data['ID']) # 讀取屬性名爲ID的列,區分大小寫
讀取前5行table
import pandas as pd data = pd.read_csv('user.csv') data.head(5) # 獲取前5行
2. 基本操做
刪除/選取某列含有特殊數值的行class
import pandas as pd data = pd.read_csv('user.csv') print(data) #刪除/選取某列含有特定數值的行 #data[data['A'].isin([1])] # 選取df1中A列包含數字1的行 data=data[~data['A'].isin([1])] # 經過~取反,選取不包含數字1的行 print(data)
刪除/選取某行含有特殊數值的列
cols=[x for i,x in enumerate(df2.columns) if df2.iat[0,i]==3] #利用enumerate對row0進行遍歷,將含有數字3的列放入cols中 print(cols) #df2=df2[cols] 選取含有特定數值的列 df2=df2.drop(cols,axis=1) #利用drop方法將含有特定數值的列刪除 print(df2)
刪除含有空值的行或列
import pandas as pd import numpy as np df1 = pd.DataFrame( [ [np.nan, 2, np.nan, 0], [3, 4, np.nan, 1], [np.nan, np.nan, np.nan, 5], [np.nan, 3, np.nan, 4] ],columns=list('ABCD')) print(df1) df2=df1.copy() df1['A']=df1['A'].fillna('null') #將df中A列全部空值賦值爲'null' print(df1) df1=df1[~df1['A'].isin(['null'])] print(df1) #刪除某行空值所在列 df2[0:1]=df2[0:1].fillna('null') print(df2) cols=[x for i,x in enumerate(df2.columns) if df2.iat[0,i]=='null'] print(cols) df2=df2.drop(cols,axis=1) print(df2)
3. 統計分析
打印統計詳細信息
import pandas as pd data = pd.read_csv('user.csv') print (data.describe()) # 打印詳細信息
統計中值
import pandas as pd data = pd.read_csv('user.csv') print (data['userAge'].median()) # 統計userAge這一列的中值
統計某一列不重複的值
import pandas as pd data = pd.read_csv('user.csv') print (data['userName'].unique()) #打印某一列不重複的值
4. 異常處理
中值填充缺失值
import pandas as pd data = pd.read_csv('user.csv') data['userAge'] = data['userAge'].fillna(data['userAge'].median())
原文:https://blog.csdn.net/qq_32599479/article/details/89361693