數據來源網站:html
經過「審查元素」選項查看每個變量的html結構: node
> library(openxlsx) > stockcode<-read.xlsx("C:/Users/steph/Desktop/stock/stockcode.xlsx",1,colNames=F) #文件包含股票代碼信息 > codes<-as.vector(stockcode[[1]]) #數據類型轉換 > EPS<-vector(length=length(codes)) #每股收益 > NAV<-vector(length=length(codes)) #每股淨資產 > ROE<-vector(length=length(codes)) #淨資產收益率 > ND<-vector(length=length(codes)) #每股未分配利潤 > CR<-vector(length=length(codes)) #每股資本公積 > OCFPS<-vector(length=length(codes)) #每股經營現金流 > NP<-vector(length=length(codes)) #淨利潤(萬元) > OI<-vector(length=length(codes)) #營業收入(萬元) > ROI<-vector(length=length(codes)) #投資收益(萬元) > FE<-vector(length=length(codes)) #財務費用(萬元)
> library(rvest) > for(n in 1:length(codes)){ + i<-codes[n] + url<-paste("http://finance.ifeng.com/app/hq/stock/",i,"/",sep="") + res1<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[1]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + EPS[n]<-res1 + res2<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[2]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + NAV[n]<-res2 + res3<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[3]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + ROE[n]<-res3 + res4<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[4]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + ND[n]<-res4 + res5<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[5]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + CR[n]<-res5 + res6<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[6]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + OCFPS[n]<-res6 + res7<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[7]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + NP[n]<-res7 + res8<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[8]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + OI[n]<-res8 + res9<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[9]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + ROI[n]<-res9 + res10<-tryCatch({read_html(url)%>%html_nodes(".tabInfor06")%>%html_nodes("li")%>%.[10]%>%html_nodes("td")%>%.[2]%>%html_text%>%as.numeric()},error=function(e){"NA"}) + FE[n]<-res10 + next + }
> FS<-data.frame(EPS,NAV,ROE,ND,CR,OCFPS,NP,OI,ROI,FE) #將抓取的數據構造數據框 > companydetail<-read.xlsx("C:/Users/steph/Desktop/stock/companydetail.xlsx",1,colNames=F) #導入股票其它信息 > companyfinacedetail<-cbind(companydetail,FS) #合併數據框 > names(companyfinacedetail)<-c("股票代碼","公司名稱","所屬行業","總市值(萬元)","流通市值(萬元)","每股收益","每股淨資產","淨資產收益率%","每股未分配利潤","每股資本公積","每股經營現金流","淨利潤(萬元)","營業收入(萬元)","投資收益(萬元)","財務費用(萬元)") #修改變量名稱 > head(companyfinacedetail,n=10) #提取前10間公司財務信息
結果以下:es6
(End)app