data.astype()javascript
給series從新指定索引的時候,可以和原來對得上的取其值,對不上的值爲NaNcss
import pandas as pd
import numpy as np
import string
d1 = {"name":["xiaoming","xiaogang"],"age":[12,15],"tel":[10010,11186]}
t1 = pd.DataFrame(d1)
print(t1)
d2 = [{"name":"xiaoming","age":10,"tel":10010},{"name":"xiaogang","tel":10086},{"name":"xiaohong","age":13}]
t2 = pd.DataFrame(d2)
print(t2)
csv_data = pd.read_csv("./IMDB-Movie-Data.csv")
print(csv_data.columns)
#取須要的字段
csv_data = pd.DataFrame(csv_data,columns=["Rank","Title","Director","Actors","Votes"])
#按投票數排列
csv_data = csv_data.sort_values(by="Votes",ascending=False)
#loc根據index和column來取數據
print(csv_data.loc[:100,["Title","Votes"]]) #由於數據通過從新排序,因此取到的並不是前100行,而是取到index==100爲止
#iloc根據行數和列數來取數據
print(csv_data.iloc[:100,[1,4]])
d3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list(string.ascii_uppercase[:3]),columns=list(string.ascii_uppercase[-4:]))
d3.loc["B":"C","W":"X"]=np.nan
print(d3)
print(pd.notnull(d3["W"]))
d3.dropna(axis=0)
#當數據全爲nan時才刪除
d3.dropna(axis=0,how="all")
#有一個nan時就刪除
d3.dropna(axis=0,how="any")
#inplace對原數據進行修改
d3.dropna(axis=1,inplace=True)
#pandas計算時會忽略nan
t2.fillna(t2.mean())
t2["age"] = t2["age"].fillna(t2["age"].mean())
print(t2)