import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import datasets,preprocessing from sklearn.model_selection import learning_curve from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score from sklearn.ensemble import ExtraTreesClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score,classification_report,confusion_matrix from pandas import read_csv data_set=read_csv("Smarket.csv") data = data_set.values[:,:] # 平均數 np.mean(data_set) np.mean(data_set["Lag1"]) # 中位數 np.median(data_set["Lag1"]) # 方差 np.var(data_set) #標準差 np.std(data_set) # 極差 np.ptp(data_set["Lag1"]) # 兩組數據的相關性矩陣 np.cov(data_set["Lag1"],data_set["Lag2"]) # 協方差矩陣 np.corrcoef(data_set["Lag1"],data_set["Lag2"]) # 分位數 q1=data_set.quantile(0.25) q2=data_set.quantile(0.5) q3=data_set.quantile(0.75) # 彙總統計 data_set.describe() #顯示全部列 pd.set_option('display.max_columns', None) #顯示全部行 pd.set_options('display.max_rows', None)