數據挖掘特徵提取方法-聚集

1.基於樹模型提取特徵html

  1 # 使用樹模型提取特徵
  2 import numpy as np
  3 from sklearn import feature_selection
  4 from sklearn.ensemble import GradientBoostingClassifier
  5 
  6 matrix = np.array(X)
  7 target = np.array(target)
  8 temp = feature_selection.SelectFromModel(GradientBoostingClassifier()).fit(matrix, target)
  9 indx = temp._get_support_mask().tolist()
 10 scores = get_importance(temp.estimator_).tolist()
 11 result = temp.transform(matrix).tolist()
 12 return scores, indx, result
 13 
 14 # X: array-like
 15 # target: array-like
 16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html
 17 

2.基於L1,L2懲罰值提取特徵spa

  1 # 基於L1,L2懲罰值提取特徵
  2 import numpy as np
  3 from sklearn import feature_selection
  4 from sklearn.linear_model import LogisticRegression
  5 
  6 matrix = np.array(arr0)
  7 target = np.array(target)
  8 temp = feature_selection.SelectFromModel(LogisticRegression(penalty="l1", C=0.1)).fit(matrix, target)
  9 indx = temp._get_support_mask().tolist()
 10 scores = get_importance(temp.estimator_).tolist()
 11 result = temp.transform(matrix).tolist()
 12 return scores, indx, result
 13 
 14 # X: array-like
 15 # target: array-like
 16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html
 17 

3.遞歸特徵消除法提取特徵code

  1 # 遞歸特徵消除法
  2 import numpy as np
  3 from sklearn import feature_selection
  4 from sklearn.linear_model import LogisticRegression
  5 
  6 matrix = np.array(X)
  7 target = np.array(target)
  8 temp = feature_selection.RFE(estimator=LogisticRegression(), n_features_to_select=n_features).fit(matrix, target)
  9 scores = temp.ranking_.tolist()
 10 indx = temp.support_.tolist()
 11 result = temp.transform(matrix).tolist()
 12 return scores, indx, result
 13 
 14 # X: array-like
 15 # target: array-like
 16 # n-features: int
 17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
 18 

4.互信息選擇法提取特徵orm

  1 # 互信息選擇法
  2 from minepy import MINE
  3 import numpy as np
  4 from sklearn import feature_selection
  5 
  6 matrix = np.array(X)
  7 target = np.array(target)
  8 def mic(x, y):
  9     m = MINE()
 10     m.compute_score(x, y)
 11     return (m.mic(), 0.5)
 12 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: mic(x, Y), X.T))).T[0], k=k).fit(matrix, target)
 13 scores = temp.scores_.tolist()
 14 indx = temp.get_support().tolist()
 15 result = temp.transform(matrix).tolist()
 16 return scores, indx, result
 17 
 18 # X: array-like
 19 # target: array-like
 20 # k: int
 21 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
 22 

5.利用相關係數選擇特徵htm

  1 # 利用相關係數選擇特徵
  2 import numpy as np
  3 from sklearn import feature_selection
  4 from sklearn.feature_selection import chi2
  5 
  6 matrix = np.array(X)
  7 target = np.array(target)
  8 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: abs(pearsonr(x, Y)[0]), X.T))), k=k).fit(matrix, target)
  9 scores = temp.scores_.tolist()
 10 indx = temp.get_support().tolist()
 11 result = temp.transform(matrix).tolist()
 12 return scores, indx, result
 13 
 14 # X: array-like
 15 # target: array-like
 16 # k: int
 17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
 18 

6.卡方檢驗法提取特徵blog

  1 # 卡方檢驗法提取特徵
  2 import numpy as np
  3 from sklearn import feature_selection
  4 from sklearn.feature_selection import chi2
  5 
  6 matrix = np.array(X)
  7 target = np.array(target)
  8 temp = feature_selection.SelectKBest(chi2, k=k).fit(matrix, target)
  9 scores = temp.scores_.tolist()
 10 indx = temp.get_support().tolist()
 11 result = temp.transform(matrix).tolist()
 12 return scores, indx, result
 13 
 14 # X: array-like
 15 # target: array-like
 16 # k: int
 17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
 18 

7.利用方差選擇特徵遞歸

  1 # 利用方差選擇特徵
  2 import numpy as np
  3 from sklearn import feature_selection
  4 
  5 matrix = np.array(X)
  6 temp = feature_selection.VarianceThreshold(threshold=t).fit(matrix)
  7 scores = [np.var(el) for el in matrix.T]
  8 indx = temp.get_support().tolist()
  9 result = temp.transform(matrix).tolist()
 10 return scores, indx, result
 11 
 12 # X: array-like
 13 # t: float
 14 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
 15 

參考:https://www.kesci.com/ci

相關文章
相關標籤/搜索