test

lend_club 全球最大的P2P平臺。 
此文章基於R語言作簡單分析。sql

  1. rm(list=ls()) #清除變量
  2. gc() #釋放內存
  • step1 
    考慮到後續分析 
    將數據導入sqlserver,用到SSIS 
    如圖

 
 **此處有坑sqlserver

  • step2 
    鏈接sqlserver,並將數據讀入R。
  1. plot
    
    lend_club4<- lend_club3%>%
      group_by(m,y)%>%
      summarise(total_m=sum(sumamount))
    
    lend_club4
    head(lend_club4)
    Source: local data frame [6 x 3]
    Groups: m [2]
    
          m     y   total_m
      (chr) (chr)     (dbl)
    1    01  2008  32256329
    2    01  2009  28523635
    3    01  2010  63082946
    4    01  2011 171186425
    5    01  2012 297667575
    6    02  2008  20596688
    

      

      

  • step3
  1. library(ggplot2)
  2. qplot(date_1,sumamount,data=lend_club1,geom="line") # 天天貸款金額的時序圖

  1. p<-qplot(month_day,sumamount,data=lend_club1)
  2. p+facet_wrap(~year) #2007-2012 期間每日的貸款金額

  1. library(tidyr)
  2. library(dplyr)
  3. lend_club2<-separate(lend_club1,date_1,c("y","m","d"),sep="-")
  4. head(lend_club2)
  5. sumamount y m d year month_day
  6. 1 2000 2007 05 26 2007 05/26
  7. 2 47400 2007 05 27 2007 05/27
  8. 3 23900 2007 05 28 2007 05/28
  9. 4 121050 2007 05 29 2007 05/29
  10. 5 87500 2007 05 30 2007 05/30
  11. 6 46500 2007 05 31 2007 05/31
  1. lend_club3<-unite(lend_club2,"y_m",y,m,sep="-",remove = F)
  2. head(lend_club3)
  3. sumamount y_m y m d year month_day
  4. 1 2000 2007-05 2007 05 26 2007 05/26
  5. 2 47400 2007-05 2007 05 27 2007 05/27
  6. 3 23900 2007-05 2007 05 28 2007 05/28
  7. 4 121050 2007-05 2007 05 29 2007 05/29
  8. 5 87500 2007-05 2007 05 30 2007 05/30
  9. 6 46500 2007-05 2007 05 31 2007 05/31
  10. qplot(m,sumamount,data=lend_club3,geom=c("boxplot")+facet_wrap(~year) #2007~2012年每個月貸款金額的箱線圖

  1. lend_club4<- lend_club3%>%
  2. group_by(m,y)%>%
  3. summarise(total_m=sum(sumamount))
  4. lend_club4
  5. head(lend_club4)
  6. Source: local data frame [6 x 3]
  7. Groups: m [2]
  8. m y total_m
  9. (chr) (chr) (dbl)
  10. 1 01 2008 32256329
  11. 2 01 2009 28523635
  12. 3 01 2010 63082946
  13. 4 01 2011 171186425
  14. 5 01 2012 297667575
  15. 6 02 2008 20596688
  1. 折線圖 分面
  2. p<-qplot(m,total_m,data=lend_club4)+geom_smooth(aes(group=y,colour=y),method = "lm")

折線圖 分面spa

  1. p<-qplot(m,total_m,data=lend_club4)+geom_smooth(aes(group=y,colour=y))

  1. p+facet_wrap(~y)

  1. lend<-read.csv("C:\\Users\\liyi\\Desktop\\lend_club.csv")
  2. lend1<-read.csv("C:\\Users\\liyi\\Desktop\\lend_club.csv",header = F)
  3. lend1<-lend1[-1,]
  4. head(lend1)
  5. lend1<-lend1[,c(1,3,9)]
  6. myvar<-c("amount","year","employment")
  7. names(lend1)<-myvar
  8. head(lend1)
  9. str(lend1)
  10. lend1$amountnew<-as.numeric(as.character(lend1$amount))
  11. library(sqldf)
  12. lend2<-sqldf('select sum(V1),V3,V9
  13. from lend1
  14. group by V3,V9')
  15. q<-qplot(employment,amountnew,data = lend1,geom=c("boxplot"),colour=lend1$employment)+facet_wrap(~year)
  16. q<- q+theme(axis.text.x=element_text(angle=90,hjust=1,colour="black"),legend.position='none')
  17. q<- q+scale_y_continuous(limits = c(0, 100000))
  18. q

相關文章
相關標籤/搜索
本站公眾號
   歡迎關注本站公眾號,獲取更多信息