import pandas as pd
import numpy as np算法
titanic_survival = pd.read_csv("titanic_train.csv")
#讀取titanic_train.csv,並將該變量儲存爲titanic_survival函數
age = titanic_survival["Age"]
print(age.loc[0:10])
#將Age這一列定義爲變量age,並將這一列的前10個元素print出來this
age_is_null = pd.isnull(age)
#判斷age這一列是不是一個缺失值,缺失顯示爲True,不缺失顯示爲False,能夠用做爲索引
print(age_is_null)spa
age_null_true = age[age_is_null]
#注意,這裏只有True的值會傳入進去,而False並不會,所以就將缺失值給篩選出來了
print(age_null_true)
age_null_count = len(age_null_true)
print (age_null_count)對象
mean_age = sum(titanic_survival["Age"])/len(titanic_survival["Age"])
print(mean_age)
#這時候會顯示NAN 由於裏面有缺失值索引
good_ages = titanic_survival["Age"][age_is_null == False]
mean_age = sum(good_ages)/len(good_ages)
print(mean_age)
#這一部就是講沒有缺失的那些值給篩選出來,定義未一個新的變量定義爲good_agepandas
correct_mean = titanic_survival["Age"].mean()
print(correct_mean)
#簡便的算法it
passenger_classes = [1,2,3]
fares_by_class = {}
for this_class in passenger_classes:
pclass_rows = titanic_survival[titanic_survival["Pclass"] == this_class]
pclass_fares = pclass_rows["Fare"]
fare_for_class = pclass_fares.mean()
fares_by_class[this_class]=fare_for_class
print(fares_by_class)
#寫一個循環,將不一樣船艙的均價給算出來table
passenger_survival = titanic_survival.pivot_table(index="Pclass",values="Survived",aggfunc=np.mean)
print(passenger_survival)
#利用pivot_table函數來簡便計算class
passenger_age = titanic_survival.pivot_table(index="Pclass",values="Age")
print(passenger_age)
#利用pivot_table函數來簡便計算,後面的aggfunc不指定就表明求平均值
port_stats = titanic_survival.pivot_table(index="Embarked",values=["Fare","Survived"],aggfunc=np.sum)
print(port_stats)
#統計出來不一樣的登船口的船費合計,以及獲救人數
drop_na_columns = titanic_survival.dropna(axis=1)
#下去搜dateframe.dropna,這個函數,這個是隻要列裏面有NA值就會將這一行給drop掉,其中axis=1表明對象是列
new_titanic_survival = titanic_survival.dropna(axis=0,subset=["Age","Sex"])
#對象是行,而後只要index: age sex中有NA這一行就會被drop掉
print(drop_na_columns)
print(new_titanic_survival)
row_index_83_age = titanic_survival.loc[83,"Age"]
row_index_766_pclass = titanic_survival.loc[766,"Pclass"]
print(row_index_83_age)
print(row_index_766_pclass)
#利用loc函數定位到精確的某一行某一列
new2_titanic_survival = titanic_survival.sort_values("Age",ascending = False)
print(new2_titanic_survival[0:10])
#將數據,以Age變量,作降序處理
titanic_reindexed = new2_titanic_survival.reset_index(drop=True)
print("---------------")
print(titanic_reindexed.loc[0:10])
#下面那個表明我如今想把他前面的序號重新排列