1.基於樹模型提取特徵html
1 # 使用樹模型提取特徵 2 import numpy as np 3 from sklearn import feature_selection 4 from sklearn.ensemble import GradientBoostingClassifier 5 6 matrix = np.array(X) 7 target = np.array(target) 8 temp = feature_selection.SelectFromModel(GradientBoostingClassifier()).fit(matrix, target) 9 indx = temp._get_support_mask().tolist() 10 scores = get_importance(temp.estimator_).tolist() 11 result = temp.transform(matrix).tolist() 12 return scores, indx, result 13 14 # X: array-like 15 # target: array-like 16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html 17
2.基於L1,L2懲罰值提取特徵spa
1 # 基於L1,L2懲罰值提取特徵 2 import numpy as np 3 from sklearn import feature_selection 4 from sklearn.linear_model import LogisticRegression 5 6 matrix = np.array(arr0) 7 target = np.array(target) 8 temp = feature_selection.SelectFromModel(LogisticRegression(penalty="l1", C=0.1)).fit(matrix, target) 9 indx = temp._get_support_mask().tolist() 10 scores = get_importance(temp.estimator_).tolist() 11 result = temp.transform(matrix).tolist() 12 return scores, indx, result 13 14 # X: array-like 15 # target: array-like 16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html 17
3.遞歸特徵消除法提取特徵code
1 # 遞歸特徵消除法 2 import numpy as np 3 from sklearn import feature_selection 4 from sklearn.linear_model import LogisticRegression 5 6 matrix = np.array(X) 7 target = np.array(target) 8 temp = feature_selection.RFE(estimator=LogisticRegression(), n_features_to_select=n_features).fit(matrix, target) 9 scores = temp.ranking_.tolist() 10 indx = temp.support_.tolist() 11 result = temp.transform(matrix).tolist() 12 return scores, indx, result 13 14 # X: array-like 15 # target: array-like 16 # n-features: int 17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html 18
4.互信息選擇法提取特徵orm
1 # 互信息選擇法 2 from minepy import MINE 3 import numpy as np 4 from sklearn import feature_selection 5 6 matrix = np.array(X) 7 target = np.array(target) 8 def mic(x, y): 9 m = MINE() 10 m.compute_score(x, y) 11 return (m.mic(), 0.5) 12 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: mic(x, Y), X.T))).T[0], k=k).fit(matrix, target) 13 scores = temp.scores_.tolist() 14 indx = temp.get_support().tolist() 15 result = temp.transform(matrix).tolist() 16 return scores, indx, result 17 18 # X: array-like 19 # target: array-like 20 # k: int 21 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html 22
5.利用相關係數選擇特徵htm
1 # 利用相關係數選擇特徵 2 import numpy as np 3 from sklearn import feature_selection 4 from sklearn.feature_selection import chi2 5 6 matrix = np.array(X) 7 target = np.array(target) 8 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: abs(pearsonr(x, Y)[0]), X.T))), k=k).fit(matrix, target) 9 scores = temp.scores_.tolist() 10 indx = temp.get_support().tolist() 11 result = temp.transform(matrix).tolist() 12 return scores, indx, result 13 14 # X: array-like 15 # target: array-like 16 # k: int 17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html 18
6.卡方檢驗法提取特徵blog
1 # 卡方檢驗法提取特徵 2 import numpy as np 3 from sklearn import feature_selection 4 from sklearn.feature_selection import chi2 5 6 matrix = np.array(X) 7 target = np.array(target) 8 temp = feature_selection.SelectKBest(chi2, k=k).fit(matrix, target) 9 scores = temp.scores_.tolist() 10 indx = temp.get_support().tolist() 11 result = temp.transform(matrix).tolist() 12 return scores, indx, result 13 14 # X: array-like 15 # target: array-like 16 # k: int 17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html 18
7.利用方差選擇特徵遞歸
1 # 利用方差選擇特徵 2 import numpy as np 3 from sklearn import feature_selection 4 5 matrix = np.array(X) 6 temp = feature_selection.VarianceThreshold(threshold=t).fit(matrix) 7 scores = [np.var(el) for el in matrix.T] 8 indx = temp.get_support().tolist() 9 result = temp.transform(matrix).tolist() 10 return scores, indx, result 11 12 # X: array-like 13 # t: float 14 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html 15