一、讀取數據python
import pandas food_info = pandas.read_csv("food_info.csv") print(type(food_info)) # <class 'pandas.core.frame.DataFrame'>
二、數據類型app
三、數據顯示函數
food_info.head() # 顯示讀取數據的前5行 food_info.head(3) # 顯示讀取數據的前3行 food_info.tail(3) # 顯示讀取數據的後3行 food_info.columns # 列名 food_indo.shape # 數據規格 food_info.loc[0] # 第0行數據 food_info.loc[3:6] # 第3-6行數據 food_info.log[83,"NDB_No"] # 讀取第83行的NDB_No數據 food_info["NDB_No"] # 經過列名讀取列 columns = ["Zinc_(mg)", "Copper_(mg)"] food_info[columns] # 讀取多個列 # 讀取單位爲g的列 col_names = food_info.columns.tolist() # 列名 gram_columns = [] for c in col_names: if c.endswith("(g)"): gram_columns.append(c) gram_df = food_info[gram_columns]
四、數據操做blog
# 對該列每個值都除以1000,+-*同理 food_info["Iron_(mg)"] / 1000 # 維度相同的列對應元素相乘 water_energy = food_info["Water_(g)"] * food_info["Energ_Kcal"] # 添加新的一列 iron_grams = food_info["Iron_(mg)"] / 1000 food_info["Iron_(g)"] = iron_grams # 最大值 food_info["Energ_Kcal"].max() # 排序 inplace-是否新生成一個DataFrame ascending-默認爲True food_info.sort_values("Sodium_(mg)", inplace=True, ascending=False) # 將排序後的數據的索引值重置,生成新的索引 new_titanic_survival = titanic_survival.sort_values("Age",ascending=False) new_titanic_survival.reset_index(drop=True)
五、缺失值處理排序
# 缺失值 pd.isnull(age) titanic_survival["Age"].mean() # 去掉缺失值後的平均值 #去掉含有缺失值的數據 titanic_survival.dropna(axis=1) # 丟掉含有缺失值的列 titanic_survival.dropna(axis=0,subset=["Age", "Sex"]) # 丟掉"Age"與"Sex"中含有缺失值的行
六、簡單的統計函數索引
# 統計在不一樣船艙中獲救人數的平均值 aggfunc-默認爲求均值 passenger_survival = titanic_survival.pivot_table(index="Pclass", values="Survived", aggfunc=np.mean)
七、自定義函數pandas
# 返回行值 def hundredth_row(column): # Extract the hundredth item hundredth_item = column.loc[99] return hundredth_item hundredth_row = titanic_survival.apply(hundredth_row) # 置換列值 def which_class(row): pclass = row['Pclass'] if pd.isnull(pclass): return "Unknown" elif pclass == 1: return "First Class" elif pclass == 2: return "Second Class" elif pclass == 3: return "Third Class" classes = titanic_survival.apply(which_class, axis=1)
八、Series結構it
from pandas import Series series_custom = Series(rt_scores , index=film_names) series_custom[['Minions (2015)', 'Leviathan (2014)']]