Lisp like R (Native support)

時間 2019-11-18

標籤 lisp native support 简体版

原文原文鏈接

原文連接

R函數式的列表(Lisp表達方式)

Emacs的Repl開發體驗`C-x C-e`, 爽到根本停不下來!

R函數式的列表(Lisp表達方式)

Emacs `C-x C-e` 執行R的S表達式

el-get-install ESS
C-c C-k 打開R的Repl, C-c C-leval當前文件緩衝到Repl裏面
C-x C-e fun r lisp!

;; 將這裏的配置放到啓動腳本init.el或者是`.emacs` (defun ess-eval-sexp (vis) (interactive "P") (save-excursion (backward-sexp) (let ((end (point))) (forward-sexp) (ess-eval-region (point) end vis "Eval sexp")))) (add-hook 'ess-mode-hook (lambda () (define-key global-map (kbd "C-x C-e") 'ess-eval-sexp) ))

lambda

# define (function (y) (function (x) ('+' (x, y)))) # call ((function (x) x) (1)) #=> [1] 1

let

## 用高階函數 和 %>% 管道來 代替let, function(a=111,b=222,c=function(...){...} ) { ... } ## function的默認參數就是一個局部變量: function(a=1, b=2) <=> let[a 1 b 2] ((function (x, y=(function (i) ('*' (i, 2))) ) (y (x))) (2)) #=> [1] 4 ## 用強大的函數管道 (library (magrittr)) ((c (1, 2, 3)) %>% (function (x) (Map ((function (x) ('+' (x, 100))), x))) %>% (function (x) (Reduce ('+', x)) ) ) #=> [1] 306 ## let複用前面的變量定義 ((function (x, y=('*' (x, 2))) y) (100)) #=> [1] 200 ## 綜合例子: function裏面的默認參數,當let來用,能夠用前面定義的變量(x,y=x),可是不能覆蓋前面定義的變量(x,x=1) ((function (y, x, mx=(as.matrix (x)), cx=(cbind (Intercept=1, mx))) ('%*%' (('%*%' ((solve ('%*%' ((t (cx)), cx))), (t (cx)))), y)) ) -> reg) (reg (y=(launch$distress_ct), x=(launch [3]))) ## [,1] ## Intercept 4.30158730 ## temperature -0.05746032

if

('if' (0, ('==' (1, 1)), ('==' (2, 1)))) #=> [1] FALSE

plot

('plot' (('rnorm' (10)), ('rnorm' (10)))) # 加了額外的參數 ('plot' (('rnorm' (10)), ('rnorm' (10)), type='b'))

Reduce

(Reduce ('*', 1:10))

Filter

((function (x) ('if' (('%%' (x, 2)), x, 0))) (2)) #=> [1] 0 # call (Filter ((function (x) ('if' (('%%' (x, 2)), x, 0))), 1:10)) #=> [1] 1 3 5 7 9

Map

(Map ((function (x) ('+' (x, 100))), 1:3)) # => [[1]] [1] 101 [[2]] [1] 102 [[3]] [1] 103

vector

## 1d: 1維 # 若是原本是前綴的表達方式的函數,引號'c'能夠省略,function除外必須加引號 (c (1, 1, 3)) #=> [1] 1 1 3 ((c (1, 8, 3)) [2]) #=> [1] 8 ((c ("A", "B", "C")) -> defvar) #=> [1] "A" "B" "C"

factor

# levels是不能重複出現的 (factor ((c ("1", "1", "3", "11", "9", "8")), levels=(c ("A", "B", "C", "AA", "BB", "CC")))) #=> [1] <NA> <NA> <NA> <NA> <NA> <NA> Levels: A B C AA BB CC ### 替換一下數據名稱,把B替換爲"良性腫塊" ==>> 因子的意義: 賦予跟多的標籤的意義 ((factor (wbcd$diagnosis, levels=(c ("B", "M")), labels=(c ("良性腫塊", "惡性腫塊")))) -> wbcd$diagnosis) #=> diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean 1 惡性腫塊 17.990 10.38 122.80 1001.0 0.11840 2 惡性腫塊 20.570 17.77 132.90 1326.0 0.08474 21 良性腫塊 13.080 15.71 85.63 520.0 0.10750

list

## 1d: 1維 (list (11, "aa", FALSE)) #=> [[1]] [1] 11 [[2]] [1] "aa" [[3]] [1] FALSE

array

## nd: N維 (1:12) ##=> [1] 1 2 3 4 5 6 7 8 9 10 11 12 ##class: [1] "integer" (array (1:12)) #=>class [1] "array" ##=> [1] 1 2 3 4 5 6 7 8 9 10 11 12 (array (1:12, (c (2, 3, 2)))) #=>class [1] "array" ## [,1] [,2] [,3] ## [1,] 7 9 11 ## [2,] 8 10 12 ##

data.frame (函數內賦值參數用: x=123)

## 2d: 2維 ((data.frame ( ID=(c (11,12,13)), Name=(c ("Devin","Edward","Wenli")), Gender=(c ("M","M","F")), Birthdate=(c ("1984-12-29","1983-5-6","1986-8-8")))) -> pt_data) #=> ID Name Gender Birthdate 1 11 Devin M 1984-12-29 2 12 Edward M 1983-5-6 3 13 Wenli F 1986-8-8 ## get: (pt_data [1, 2]) #=> 第一行,第二列 [1] Devin Levels: Devin Edward Wenli (pt_data [,3]) #=> 只是第三列 [1] M M F Levels: F M ((pt_data [-1]) [-2]) #=> 去除第一,而後再去除第二列     Name Birthdate 1 Devin 1984-12-29 2 Edward 1983-5-6 3 Wenli 1986-8-8 (pt_data$Birthdate) #=> 取某一列 [1] 1984-12-29 1983-5-6 1986-8-8 Levels: 1983-5-6 1984-12-29 1986-8-8 (pt_data [2:3]) # 取範圍 Name Gender 1 Devin M 2 Edward M 3 Wenli F

matrix (函數內賦值參數用: x=123)

## 2d: 2維 (matrix ((c (1, 2, 1, 3, 5, 8)), nrow=2)) #=> 2行->3列 [,1] [,2] [,3] [1,] 1 1 5 [2,] 2 3 8 (matrix ((c (1, 2, 1, 3, 5, 8)), ncol=2)) #=> [,1] [,2] [1,] 1 3 [2,] 2 5 [3,] 1 8 (matrix ((c (1, 2, 4, 3)), ncol=1)) #=> 單列矩陣 [,1] [1,] 1 [2,] 2 [3,] 4 [4,] 3 (matrix ((c (1, 2, 4, 3)), nrow=1)) #=> 單行矩陣 [,1] [,2] [,3] [,4] [1,] 1 2 4 3 (cbind ((c (1, 1, 1)), (c (1, 0, 1)), (c (0, 1, 0)))) #=> 拼接矩陣 ##     [,1] [,2] [,3] ## [1,] 1 1 0 ## [2,] 1 0 1 ## [3,] 1 1 0 ## =========== 矩陣線性代數 ## 矩陣轉置: 若是參數裏面只有一個參數時,而且是函數調用的時候,能夠省略參數標記的一對括號,以下=> (t (matrix ((c (1, 2, 1, 3, 5, 8)), ncol=2))) ## [,1] [,2] ## [1,] 1 3 ## [2,] 2 5 ## [3,] 1 8 ## ==>> ## [,1] [,2] [,3] ## [1,] 1 2 1 ## [2,] 3 5 8 ## 矩陣的標量運算 ('*' (10, (matrix ((c (1, 2, 1, 3, 5, 8)), ncol=2)))) ## [,1] [,2] ## [1,] 10 30 ## [2,] 20 50 ## [3,] 10 80 ## ## 矩陣求和: 必須結構相同才能相加 ('+' ((matrix ((c (9, 2, 3, 8, 1, 4)), ncol=2)), (matrix ((c (0, 3, 5, 3, 7, 2)), ncol=2)))) # A + B ## [,1] [,2] ## [1,] 9 8 ## [2,] 2 1 ## [3,] 3 4 ## [,1] [,2] ## [1,] 0 3 ## [2,] 3 7 ## [3,] 5 2 ## =======>>>>> ## [,1] [,2] ## [1,] 9 11 ## [2,] 5 8 ## [3,] 8 6 ## ## 矩陣乘法: A的列數必須等於B的行數 <=> 列的加權求和 ('%*%' ((matrix ((c (1, 4, 3, 0, 1, 2)), ncol=2)), (matrix ((c (7, 8)), ncol=1)))) # A * B ## [,1] ## [1,] 7 ## [2,] 36 ## [3,] 37 ## ## 矩陣求逆: 必需是正方形的 (solve (matrix ((c (1, 4, 3, 0, 1, 2, 1, 6, 8)), ncol=3))) ## [,1] [,2] [,3] ## [1,] 1 0 1 ## [2,] 4 1 6 ## [3,] 3 2 8 ## ===>>> ## [,1] [,2] [,3] ## [1,] -4 2 -1 ## [2,] -14 5 -2 ## [3,] 5 -2 1 ##

csv 表格數據文件

(write.csv (pt_data, file="my-data-frame.csv")) # cat my-data-frame.csv #=> "","ID","Name","Gender","Birthdate" "1",11,"Devin","M","1984-12-29" "2",12,"Edward","M","1983-5-6" "3",13,"Wenli","F","1986-8-8" (read.csv ("my-data-frame.csv")) #=> X ID Name Gender Birthdate 1 1 11 Devin M 1984-12-29 2 2 12 Edward M 1983-5-6 3 3 13 Wenli F 1986-8-8 # => read from web: ((read.csv ("http://127.0.0.1:8003/wisc_bc_data.csv", stringsAsFactors=FALSE)) -> wbcd)

table記錄頻數的方法(每一類)

(table (wbcd$diagnosis)) # B M # B是良性腫塊, B是惡性腫塊 # 357 212

round & prop.table & table計算頻率百分比

(round (('*' ((prop.table (table (wbcd$diagnosis))) ,100)), digits=1)) ## 2.良性腫塊 惡性腫塊 ## 62.7 37.3 # 百分比計算

summary 總結數據特徵,極值, 細胞核的3種特徵: 最小, 最大, 平均值,中間值等

## 3.總結特徵, 細胞核的3種特徵: 最小, 最大, 平均值,中間值等 (summary ((wbcd [(c ("radius_mean", "area_mean", "smoothness_mean"))]))) #=> radius_mean area_mean smoothness_mean Min. : 6.981 Min. : 143.5 Min. :0.05263 1st Qu.:11.700 1st Qu.: 420.3 1st Qu.:0.08637 Median :13.370 Median : 551.1 Median :0.09587 Mean :14.127 Mean : 654.9 Mean :0.09636 3rd Qu.:15.780 3rd Qu.: 782.7 3rd Qu.:0.10530 Max. :28.110 Max. :2501.0 Max. :0.16340

min & max 標準化數值型數據,以便確保在標準的範圍內

((function (x) ('/' (('-' (x, (min (x)))), ('-' ((max (x)), (min (x))))))) -> normalize) (normalize ((c (10, 20, 30, 40, 50)))) #=> [1] 0.00 0.25 0.50 0.75 1.00

lapply表格數據每個數據單元都執行某個操做: 至關於map了,結果變成了list列表

(lapply ((wbcd [2:31]), normalize)) #=> $radius_mean [1] 0.52103744 0.64314449 0.60149557 0.21009040 0.62989256 0.25883856 ... $texture_mean [1] 0.02265810 0.27257355 0.39026040 0.36083869 0.15657761 0.20257017 ... ((as.data.frame ((lapply ((wbcd [2:31]), normalize)))) -> wbcd_n) #=> list列表(能夠不一樣類型): 從新變成data.frame radius_mean texture_mean perimeter_mean area_mean smoothness_mean 1 0.52103744 0.02265810 0.54598853 0.36373277 0.59375282 2 0.64314449 0.27257355 0.61578329 0.50159067 0.28987993

一元線性迴歸

(1:10 -> x) #=> [1] 1 2 3 4 5 6 7 8 9 10 (('+' (x, (rnorm (10, 0, 1)))) -> y) #=> # [1] 0.4150231 1.9585418 1.7173466 3.2213521 4.0119051 4.8112887 5.7995432 # [8] 7.1943800 9.3619532 9.2997215 ((lm (y ~ x)) -> fit) #=> # Call: # lm(formula = y ~ x) # # Coefficients: # (Intercept) x # -0.8111 1.0164 (summary (fit)) # Call: # lm(formula = y ~ x) # # Residuals: # Min 1Q Median 3Q Max # 0.52077 -0.42176 -0.08944 0.14898 1.02546 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) -0.81107 0.38014 -2.134 0.0654 . # x 1.01640 0.06126 16.590 1.76e-07 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Residual standard error: 0.5565 on 8 degrees of freedom # Multiple R-squared: 0.9718, Adjusted R-squared: 0.9682 # F-statistic: 275.2 on 1 and 8 DF, p-value: 1.76e-07 #

knn

(library (class))

((knn (train=wbcd_train, test=wbcd_test, cl=wbcd_train_labels, k=21)) -> wbcd_test_pred) # knn返回wbcd_test_pred因子向量,爲測試數據集中的每個案例返回一個預測標籤 # 評估模型的性能 (library (gmodels)) (CrossTable (x=wbcd_test_labels, y=wbcd_test_pred, prop.chisq=FALSE))

bayes

str 查看dataframe特徵 & 類型 & 總數, 數據輪廓

(str (credit))
#=> ## 'data.frame': 1000 obs. of 21 variables: ## $ checking_balance : Factor w/ 4 levels "< 0 DM","> 200 DM",..: 1 3 4 1 1 4 4 3 4 3 ... ## $ months_loan_duration: int 6 48 12 42 24 36 24 36 12 30 ... ## $ credit_history : Factor w/ 5 levels "critical","delayed",..: 1 5 1 5 2 5 5 5 5 1 ... ## $ purpose : Factor w/ 10 levels "business","car (new)",..: 8 8 5 6 2 5 6 3 8 2 ... ## $ amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ... ## ... ... ## $ job : Factor w/ 4 levels "mangement self-employed",..: 2 2 4 2 2 4 2 1 4 1 ...

summary總結某列數據的Min/Max,Median,Mean等

(summary (credit$months_loan_duration)) #=> Min. 1st Qu. Median Mean 3rd Qu. Max. 4.0 12.0 18.0 20.9 24.0 72.0

head看數據前幾個值,tail-log

(head (credit_rand$amount)) #=> [1] 2346 2030 1082 2631 3069 1333

評估模型的性能: gmodels/CrossTable

(library (gmodels))
(CrossTable (x=wbcd_test_labels, y=wbcd_test_pred, prop.chisq=FALSE)) ## | wbcd_test_pred ## wbcd_test_labels | 良性腫塊 | 惡性腫塊 | Row Total | ## -----------------|-----------|-----------|-----------| ## 良性腫塊 | 77 | 0 | 77 | ## | 1.000 | 0.000 | 0.770 | ## | 0.975 | 0.000 | | ## | 0.770 | 0.000 | | ## -----------------|-----------|-----------|-----------| ## 惡性腫塊 | 2 | 21 | 23 | ## | 0.087 | 0.913 | 0.230 | ## | 0.025 | 1.000 | | ## | 0.020 | 0.210 | | ## -----------------|-----------|-----------|-----------| ## Column Total | 79 | 21 | 100 | ## | 0.790 | 0.210 | | ## -----------------|-----------|-----------|-----------|

c50決策樹

(library (C50))
((C5.0 ((credit_train [-17]), credit_train$default)) -> credit_model) ((predict (credit_model, credit_test)) -> credit_pred) (CrossTable (credit_test$default, credit_pred, prop.chisq=FALSE, prop.c=FALSE, prop.r=FALSE, dnn=(c ('actual default', 'predicted default'))))

neuralnet

(library (neuralnet))
## neuralnet函數用於數值預測的神經網絡: 多種原料=>強度預測, 用多層前饋神經網絡 ((neuralnet (strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data=concrete_train)) -> concrete_model) ## 預測強度 ((model_results$net.result) -> predicted_strength) ## cor用來獲取兩個數值向量之間的相關性 (cor (predicted_strength, concrete_test$strength)) ## [,1] ## [1,] 0.7195218932

svm

(library (kernlab))
## 字母分類器: 超平面分割面=>兩類數據空間化(填充,龔起來)=>分割完了再降維 ((ksvm (letter ~ ., data=letters_train, kernel="vanilladot")) -> letter_classifier) ## 評估模型的性能: 字母的預測 ((predict (letter_classifier, letters_test)) -> letter_predictions) ## 預測的值和真實的值進行比較=> (round (('*' ((prop.table (table ('==' (letter_predictions, letters_test$letter)))) ,100)), digits=1)) ## ==>> 正確率爲83.9% ## FALSE TRUE ## 16.1 83.9

kmeans

## 只是取36個特徵: ((teens [5:40]) -> interests) ((as.data.frame (lapply (interests, scale))) -> interests_z) ## k均值聚類: ((kmeans (interests_z, 5)) -> teen_clusters) ## 看到分出來5類,各自的數量以下 (teen_clusters$size) # [1] 868 5089 2528 986 20529 # 份量teen_clusters$centers查看聚類質心的座標,全部的特徵 (teen_clusters$centers)

R宏%>%

(library (magrittr))
(1 %>% (function (x) ('+' (x, 100))) %>% (function (x) (print (x))) ) #=> [1] 101

對於function裏面定義臨時變量,用pipe

(library (tm))
(library (magrittr)) ((function (text) (text %>% (function (st) (Corpus ((VectorSource (st))))) %>% (function (cor) (tm_map (cor, (content_transformer (tolower))))) %>% (function (cor) (tm_map (cor, removePunctuation))) %>% (function (cor) (tm_map (cor, removeNumbers))) %>% (function (cor) (tm_map (cor, removeWords, (c (stopwords("SMART"), "thy", "thou", "thee", "the", "and", "but"))))) %>% (function (cor) (TermDocumentMatrix (cor, control=(list (minWordLength=1))))) %>% (function (mydtm) (as.matrix (mydtm))) %>% (function (m) (sort ((rowSums (m)), decreasing=TRUE))) )) -> getTermMatrix) (getTermMatrix ("The Clojure Programming Language. Clojure is a dynamic, general-purpose programming")) #=> ## clojure programming dynamic generalpurpose language ## 2 2 1 1 1

regression

## 3.1 探索特徵之間的關係---相關係數矩陣 (cor (insurance [(c ("age", "bmi", "children", "charges"))])) ## age bmi children charges ## age 1.0000000 0.1092719 0.04246900 0.29900819 ## bmi 0.1092719 1.0000000 0.01275890 0.19834097 ## children 0.0424690 0.0127589 1.00000000 0.06799823 ## charges 0.2990082 0.1983410 0.06799823 1.00000000 ## 3.2 可視化特徵之間的關係------散點圖矩陣 ## (pairs (insurance [(c ("age", "bmi", "children", "charges"))])) #=> pairs_insurance.png (library (psych)) ## pairs.panels能夠顯示擬合的線 ## (pairs.panels (insurance [(c ("age", "bmi", "children", "charges"))])) #=> pairs_panels_insurance.png ## 3.3 基於數據訓練模型 -------------- ((lm (charges ~ age + children + bmi + sex + smoker + region, data=insurance)) -> ins_model) ## Call: ## lm(formula = charges ~ age + children + bmi + sex + smoker + ## region, data = insurance) ## ## Coefficients: ## (Intercept) age children bmi ## -11938.5 256.9 475.5 339.2 ## sexmale smokeryes regionnorthwest regionsoutheast ## -131.3 23848.5 -353.0 -1035.0 ## regionsouthwest ## -960.1 ## ## 3.4 評估模型的性能 (summary (ins_model))

特徵選擇Boruta

(library (Boruta))
((Boruta (Classes~., data=(train [,-348]))) -> Boruta.mod) (png ("Boruta_selection.png", width=4000,height=1600)) (plot (Boruta.mod, las="2")) (dev.off ()) ## 將選出來的重要特徵保存到一個rda裏面 (library (magrittr)) (library (dplyr)) #select函數 (train %>% (function (data) (select (data, zakończyć,zdjęcie,należeć,naprawdę,polski,kobieta,sierpień,zobaczyć,dotyczyć,szczęście,mężczyzna,europejski))) -> train_Boruta) (save (train_Boruta, file="train_Boruta.rda"))

分佈語義模型wordspace

(library (wordspace))
((subset (DSM_VerbNounTriples_BNC, mode=="written")) -> Triples) (str (Triples)) ## 'data.frame': 236043 obs. of 5 variables: ## $ noun: chr "aa" "aa" "aa" "abandonment" ... ## $ rel : chr "subj" "subj" "subj" "subj" ... ## $ verb: chr "be" "have" "say" "be" ... ## $ f : num 7 5 12 14 45 13 6 23 5 7 ... ## $ mode: Factor w/ 2 levels "spoken","written": 2 2 2 2 2 2 2 2 2 2 ... ## ((dsm (target=Triples$noun, feature=Triples$verb, score=Triples$f, raw.freq=TRUE, sort=TRUE)) -> VObj) ## Distributional Semantic Model with 10940 rows x 3149 columns ## * raw co-occurrence matrix M available ## - sparse matrix with 199.2k / 34.5M nonzero entries (fill rate = 0.58%) ## - in canonical format ## - known to be non-negative ## - sample size of underlying corpus: 5010.1k tokens ## ((dsm.projection (VObj, method="rsvd", n=300, oversampling=4)) -> VObj300) ## rsvd1 rsvd2 rsvd3 rsvd4 ## aa -0.401869162 -5.715081e-01 -8.639994e-04 5.568812e-02 ## abandonment -0.164028349 7.388197e-02 -7.621238e-02 -6.218919e-02 ## abbey -0.501348714 -1.529309e-01 1.568900e-01 -3.602057e-02 ## abbot -0.556840969 -3.608457e-01 4.297981e-02 -1.349133e-02 ## ability -0.136815703 9.964295e-02 -1.508660e-01 1.483469e-01 ## abnormality -0.322822789 9.148822e-02 2.325598e-02 5.194156e-02 ## correlation 相關性 with RG65 ratings =>>>> ## distributional model 分佈模型 => distributional semantic model: 分佈語義模型 (plot (eval.similarity.correlation (RG65, VObj300, format="HW", details=TRUE))) #=> similarity_correlation.png ((dist.matrix (VObj300, terms=nn.terms, method="cosine")) -> nn.dist) ## book paper article poem works magazine novel ## book 0.00000 45.07368 51.91946 53.48004 53.91710 53.94898 54.40499 ## paper 45.07368 0.00000 49.58058 59.41401 63.39080 58.39195 59.63905 ## article 51.91946 49.58058 0.00000 50.85024 63.56611 63.34272 56.34124 ## poem 53.48004 59.41401 50.85024 0.00000 66.11456 64.23977 39.68612 ## works 53.91710 63.39080 63.56611 66.11456 0.00000 64.21008 65.37230 (library (MASS)) ((isoMDS (nn.dist, p=2)) -> mds) ## initial value 31.571861 ## iter 5 value 27.057916 ## final value 23.256689 ## converged ## $points ## [,1] [,2] ## book -2.887478 -1.8723470 ## paper -11.206053 -6.0067030 ## plot是畫板加畫圖 (plot (mds$points, pch=20, col="red")) ## 更新圖畫的內容 (text (mds$points, labels=nn.terms, pos=3)) #=>> neighbourhood_graph_for_book.png

特徵選擇Caret

importance繪圖git

(library (caret))
(library (rpart)) (library (e1071)) ((trainControl (method="repeatedcv", number=10,repeats=3)) -> control) ((train (churn~., data=trainset, method="rpart",preProcess="scale", trControl=control)) -> model) ## 2315 samples ## 16 predictor ## 2 classes: 'yes', 'no' ## Pre-processing: scaled (16) ## Resampling: Cross-Validated (10 fold, repeated 3 times) ## Summary of sample sizes: 2084, 2084, 2083, 2083, 2082, 2084, ... ## Resampling results across tuning parameters: ## cp Accuracy Kappa ## 0.05555556 0.8995112 0.5174059 ## 0.07456140 0.8593389 0.2124126 ## 0.07602339 0.8567440 0.1898221 ## Accuracy was used to select the optimal model using the largest value. ## The final value used for the model was cp = 0.05555556. ## ((varImp (model, scale=FALSE)) -> importance) ## rpart variable importance ## Overall ## number_customer_service_calls 116.015 ## total_day_minutes 106.988 ## total_day_charge 100.648 ## ... (plot (importance)) ##=> fs_churn_importance_by_caret.png

特徵篩選FSelector

(library (FSelector))
## 計算每一個屬性的權值 ((random.forest.importance (churn~., trainset, importance.type=1)) -> weights) ## 獲取權重最高的5個屬性 ((cutoff.k (weights, 5)) -> subset) ## [1] "number_customer_service_calls" "international_plan" ## [3] "total_day_charge" "total_day_minutes" ## [5] "total_intl_calls"

直方圖hist

(hist (insurance$charges)) #==>> charges_hist.png

散點圖pairs

(pairs (insurance [(c ("age", "bmi", "children", "charges"))])) #=> pairs_insurance.png

bmp降維svd

(library (bmp))
# 將圖片導入爲數值矩陣 ((read.bmp ("lena512.bmp")) -> lenna) ## 進行SVD操做,保存到新的變量lenna.svd, 繪製方差的百分比圖 ((svd (scale (lenna))) -> lenna.svd) (plot (('/' (lenna.svd$d^2, (sum (lenna.svd$d^2)))), type="l", xlab=" Singular vector", ylab = "Variance explained")) #=> variance_percentage.png ## 找到能解釋90%以上變量的奇異向量數據點: 90%類似度須要27個奇異向量才能達到 (min (which ('>' ((cumsum ('/' (lenna.svd$d^2, (sum (lenna.svd$d^2))))), 0.9)))) ##=> [1] 27 ## 矩陣相乘, u v d ((function (dim, u=(as.matrix (lenna.svd$u[, 1:dim])), v=(as.matrix (lenna.svd$v[, 1:dim])), d=(as.matrix ((diag (lenna.svd$d)) [1:dim, 1:dim]))) (image ('%*%' (('%*%' (u, d)), (t (v)))) ) ) -> lenna_compression) (lenna_compression (27))