pandas中的series數據類型

時間 2019-12-13
原文原文鏈接
import pandas as pd
import numpy as np
import names

'''
寫在前面的話：
    一、series與array類型的不一樣之處爲series有索引，而另外一個沒有;series中的數據必須是一維的，而array類型不必定
    二、能夠把series當作一個定長的有序字典，能夠經過shape,index,values等獲得series的屬性
'''
# 一、series的建立
'''
（1）由列表或numpy數組建立
        默認索引爲0到N-1的整數型索引，如s1;
        能夠經過設置index參數指定索引，如s2；
        經過這種方式建立的series，不是array的副本，即對series操做的同時也改變了原先的array數組，如s3
（2）由字典建立
        字典的鍵名爲索引，鍵值爲值，如s4；
'''
n1 = np.array([1, 4, 5, 67, 7, 43, ])
s1 = pd.Series(n1)
# print(s1)
'''
0     1
1     4
2     5
3    67
4     7
5    43
dtype: int32
'''
s2 = pd.Series(n1, index=['a', 'b', 'c', 'd', 'e', 'f'])
# print(s2)
'''
a     1
b     4
c     5
d    67
e     7
f    43
dtype: int32
'''
# print(n1)
'''
[ 1  4  5 67  7 43]
'''
s1[2] = 100
s3 = s1
# print(s3)
'''
0      1
1      4
2    100
3     67
4      7
5     43
dtype: int32
'''
# print(n1)
'''
[  1   4 100  67   7  43]
'''
dict1 = {}
for i in range(10, 15):
    # names.get_last_name()，隨機生成英文名字
    dict1[names.get_last_name()] = i
s4 = pd.Series(dict1)
# print(s4)
'''
Poole     10
Allen     11
Davis     12
Roland    13
Brehm     14
dtype: int64
'''
# 二、series的索引
'''
（1）經過index取值，能夠經過下標獲取，也能夠經過指定索引獲取，如s6，s7
（2）經過.loc[]（顯示索引）獲取，這種方式只能獲取顯示出來的索引，沒法經過下標獲取，如s7（推薦）
（3）隱式索引，使用整數做爲索引值，使用.icol[]，如s9（推薦）
'''
s5 = pd.Series(np.array([1, 5, 9, 7, 6, 4, 52, 8]), index=[list('abcdefgh')])
# print(s5)
'''
a     1
b     5
c     9
d     7
e     6
f     4
g    52
h     8
dtype: int32
'''
s6 = s5[2]
# print(s6)
'''
9
'''
s7 = s5['c']
# print(s7)
'''
c    9
dtype: int32
'''
s8 = s5.loc['c']
# print(s8)
'''
c    9
dtype: int32
'''
s9 = s5.iloc[2]
# print(s9)
'''
9
'''
# 三、series的切片
'''
    一、series的切片和列表的用法相似，不一樣之處在於建議使用.loc[:]和.iloc[:]，如s10和s11。固然直接使用[:]也能夠。
    二、當遇到特別長的series，咱們支取出前5條或後5條數據時能夠直接使用.head()或.tail()
'''
s5 = pd.Series(np.array([1, 5, 9, 7, 6, 4, 52, 8]), index=[list('abcdefgh')])
# print(s5)
'''
a     1
b     5
c     9
d     7
e     6
f     4
g    52
h     8
dtype: int32
'''
s10 = s5.loc['b':'g']
# print(s10)
'''
b     5
c     9
d     7
e     6
f     4
g    52
dtype: int32
'''
s11 = s5.iloc[1:7]
# print(s11)
'''
b     5
c     9
d     7
e     6
f     4
g    52
dtype: int32
'''
# 四、關於NaN
'''
    （1）NaN是表明空值， 但不等於None。二者的數據類型不同，None的類型爲<class 'NoneType'>，而NaN的類型爲<class 'float'>；
    （2）可使用pd.isnull(),pd.notnull()，或自帶isnull(),notnull()函數檢測缺失數據
'''
# print(type(None),type(np.nan))
'''
<class 'NoneType'> <class 'float'>
'''
s12 = pd.Series([1,2,None,np.nan],index=list('烽火雷電'))
# print(s12)
'''
烽    1.0
火    2.0
雷    NaN
電    NaN
dtype: float64
'''
# print(pd.isnull(s12))
'''
烽    False
火    False
雷     True
電     True
dtype: bool
'''
# print(pd.notnull(s12))
'''
烽     True
火     True
雷    False
電    False
dtype: bool
'''
# print(s12.notnull())
'''
烽     True
火     True
雷    False
電    False
dtype: bool
'''
# print(s12.isnull())
'''
烽    False
火    False
雷     True
電     True
dtype: bool
'''
# 取出series中不爲空的值
# print(s12[s12.notnull()])
'''
烽    1.0
火    2.0
dtype: float64
'''
# series的name屬性
'''

'''
s12.name = '風水'
# print(s12)
'''
烽    1.0
火    2.0
雷    NaN
電    NaN
Name: 風水, dtype: float64
'''