更多大數據分析、建模等內容請關注gongzhonghao《bigdatamodeling》函數
將代碼封裝在函數PlotKS_N裏,Pred_Var是預測結果,能夠是評分或機率形式;labels_Var是好壞標籤,取值爲1或0,1表明壞客戶,0表明好客戶;descending用於控制數據按違約機率降序排列,若是Pred_Var是評分,則descending=0,若是Pred_Var是機率形式,則descending=1;N表示在將數據按風險降序排列後,等分N份後計算KS值。大數據
PlotKS_N函數返回的結果爲一列表,列表中的元素依次爲KS最大值、KS取最大值的人數百分位置、KS曲線對象、KS數據框。rest
代碼以下:code
1 #################### PlotKS_N ################################ 2 PlotKS_N<-function(Pred_Var, labels_Var, descending, N){ 3 # Pred_Var is prop: descending=1 4 # Pred_Var is score: descending=0 5 library(dplyr) 6 7 df<- data.frame(Pred=Pred_Var, labels=labels_Var) 8 9 if (descending==1){ 10 df1<-arrange(df, desc(Pred), labels) 11 }else if (descending==0){ 12 df1<-arrange(df, Pred, labels) 13 } 14 15 df1$good1<-ifelse(df1$labels==0,1,0) 16 df1$bad1<-ifelse(df1$labels==1,1,0) 17 df1$cum_good1<-cumsum(df1$good1) 18 df1$cum_bad1<-cumsum(df1$bad1) 19 df1$rate_good1<-df1$cum_good1/sum(df1$good1) 20 df1$rate_bad1<-df1$cum_bad1/sum(df1$bad1) 21 22 if (descending==1){ 23 df2<-arrange(df, desc(Pred), desc(labels)) 24 }else if (descending==0){ 25 df2<-arrange(df, Pred, desc(labels)) 26 } 27 28 df2$good2<-ifelse(df2$labels==0,1,0) 29 df2$bad2<-ifelse(df2$labels==1,1,0) 30 df2$cum_good2<-cumsum(df2$good2) 31 df2$cum_bad2<-cumsum(df2$bad2) 32 df2$rate_good2<-df2$cum_good2/sum(df2$good2) 33 df2$rate_bad2<-df2$cum_bad2/sum(df2$bad2) 34 35 rate_good<-(df1$rate_good1+df2$rate_good2)/2 36 rate_bad<-(df1$rate_bad1+df2$rate_bad2)/2 37 df_ks<-data.frame(rate_good,rate_bad) 38 39 df_ks$KS<-df_ks$rate_bad-df_ks$rate_good 40 41 L<- nrow(df_ks) 42 if (N>L) N<- L 43 df_ks$tile<- 1:L 44 qus<- quantile(1:L, probs = seq(0,1, 1/N))[-1] 45 qus<- ceiling(qus) 46 df_ks<- df_ks[df_ks$tile%in%qus,] 47 df_ks$tile<- df_ks$tile/L 48 df_0<-data.frame(rate_good=0,rate_bad=0,KS=0,tile=0) 49 df_ks<-rbind(df_0, df_ks) 50 51 M_KS<-max(df_ks$KS) 52 Pop<-df_ks$tile[which(df_ks$KS==M_KS)] 53 M_good<-df_ks$rate_good[which(df_ks$KS==M_KS)] 54 M_bad<-df_ks$rate_bad[which(df_ks$KS==M_KS)] 55 56 library(ggplot2) 57 PlotKS<-ggplot(df_ks)+ 58 geom_line(aes(tile,rate_bad),colour="red2",size=1.2)+ 59 geom_line(aes(tile,rate_good),colour="blue3",size=1.2)+ 60 geom_line(aes(tile,KS),colour="forestgreen",size=1.2)+ 61 62 geom_vline(xintercept=Pop,linetype=2,colour="gray",size=0.6)+ 63 geom_hline(yintercept=M_KS,linetype=2,colour="forestgreen",size=0.6)+ 64 geom_hline(yintercept=M_good,linetype=2,colour="blue3",size=0.6)+ 65 geom_hline(yintercept=M_bad,linetype=2,colour="red2",size=0.6)+ 66 67 annotate("text", x = 0.5, y = 1.05, label=paste("KS=", round(M_KS, 4), "at Pop=", round(Pop, 4)), size=4, alpha=0.8)+ 68 69 scale_x_continuous(breaks=seq(0,1,.2))+ 70 scale_y_continuous(breaks=seq(0,1,.2))+ 71 72 xlab("of Total Population")+ 73 ylab("of Total Bad/Good")+ 74 75 ggtitle(label="KS - Chart")+ 76 77 theme_bw()+ 78 79 theme( 80 plot.title=element_text(colour="gray24",size=12,face="bold"), 81 plot.background = element_rect(fill = "gray90"), 82 axis.title=element_text(size=10), 83 axis.text=element_text(colour="gray35") 84 ) 85 86 result<-list(M_KS=M_KS,Pop=Pop,PlotKS=PlotKS,df_ks=df_ks) 87 return(result) 88 }
做圖以下:對象