Python描述性統計numpy

 

 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets,preprocessing
from sklearn.model_selection import learning_curve
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

from pandas import read_csv
data_set=read_csv("Smarket.csv")
data = data_set.values[:,:]

# 平均數
np.mean(data_set)
np.mean(data_set["Lag1"])

# 中位數
np.median(data_set["Lag1"])

# 方差
np.var(data_set)

#標準差
np.std(data_set)

# 極差
np.ptp(data_set["Lag1"])


# 兩組數據的相關性矩陣
np.cov(data_set["Lag1"],data_set["Lag2"])

# 協方差矩陣
np.corrcoef(data_set["Lag1"],data_set["Lag2"])

# 分位數
q1=data_set.quantile(0.25)
q2=data_set.quantile(0.5)
q3=data_set.quantile(0.75)

# 彙總統計
data_set.describe()

#顯示全部列
pd.set_option('display.max_columns', None)
#顯示全部行
pd.set_options('display.max_rows', None)
相關文章
相關標籤/搜索