import pandas sub_info = pandas.read_csv("contract.csv") #sub_info #print (sub_info) type(sub_info) #print (sub_info.dtypes) first_rows = sub_info.head(1) #print (first_rows) #print (sub_info.columns) #print (sub_info.shape) #print (sub_info.loc[1]) sub_info.loc[0:3] two_five_nine = [2,5,9] sub_info.loc[two_five_nine] id1 = sub_info["CONTRACTID"] id1 str1 = ["CONTRACTID","STATUS"] id2 = sub_info[str1] id2 sub_info.columns columns_list = sub_info.columns.tolist() time_list = [] for i in columns_list: if i.endswith("TIME"): time_list.append(i) time_info = sub_info[time_list] is_value_empty = time_info.isnull() is_value_empty time_info.fillna("0") #用前一個數據代替NaN:method='pad' time_info.fillna(method='pad') #與pad相反,bfill表示用後一個數據代替NaN time_info.fillna(method='bfill') #用limit限制每列能夠替代NaN的數目 time_info.fillna(method='bfill',limit=1) #使用平均數代替NaN time_info.fillna(time_info.mean()) #指定列 數據代替NaN time_info.fillna(time_info.mean()['SUBTIME':'OPRTIME'])
test_info = sub_info[["CONTRACTID","STATUS"]] #A value is trying to be set on a copy of a slice from a DataFrame test_num = test_info["STATUS"] / 10 test_info["test"] = test_num test_info test_num = sub_info["STATUS"] / 10 sub_info["test"] = test_num sub_info CONTRACTID_MAX = sub_info["CONTRACTID"].max() max_percent = sub_info["CONTRACTID"] / CONTRACTID_MAX sub_info["max_percent"] = max_percent #對列排序,參數True 表示不產生拷貝,直接在原矩陣操做 sub_info.sort_values("CONTRACTID",inplace=True) sub_info #ascending 降序 sub_info.sort_values("CONTRACTID",inplace=True,ascending=False) sub_info