存儲爲3個文件夾:大盤數據、股票數據與歸納文件python
數據獲取來源:股票數據來源於R中的WindR;大盤數據來源於python中的tushare(wind中指數彷佛只有中證板塊)app
設置數據文件夾,代碼中只需修改更新截止日期與工做路徑,後面建立文件都無需手動操做spa
文件夾中文件不能隨意變更,標紅代碼部分須要本身修改code
股票數據中變量分別爲股票代碼、股票簡稱、日期、時間、開高低收(乘10000)、交易量、交易額、轉手率(基於自由流通股本)、自由流通股本orm
## 初始設置
os.chdir("C:/Users/Administrator.USER-20161208UW/Desktop/數據") #修改當前工做目錄
start_date = "2009-01-01"
end_date = "2017-12-20"blog
new_date = end_date.replace("-", "")
## 建立新文件夾
os.mkdir("ts大盤數據")
os.mkdir("ts大盤數據/大盤數據"+new_date)get
## 歸納信息
stock_info = pd.DataFrame({"stock.code":["399001", "399005", "399006", "399300", "999999"] ,
"stock.name":["深證成指", "中小板指", "創業板指", "滬深300", "上證指數"],
"type":["SZ", "SZ", "SZ", "SZ", "SH"]})
stock_info.to_csv("ts大盤數據/大盤數據"+new_date+"/stock_info.csv", index=False)it
# 獲取大盤信息
dapanzhishu1 = ["399001", "399005", "399006", "399300", "000001"]
dapanzhishu2 = ["399001", "399005", "399006", "399300", "999999"]
wind_code = ["399001.SZ", "399005SZ", "399006SZ", "399300SZ", "999999SH"]
name = ["深證成指", "中小板指", "創業板指", "滬深300", "上證指數"]
for i in list(range(5)):io
df = ts.get_h_data(dapanzhishu1[i], start=start_date, end=end_date, index=True, pause=4)
df.sort_index(inplace=True)
df[["open", "high", "close", "low"]] = df[["open", "high", "close", "low"]]*10000
df["date"] = df.index
df["date"] = df["date"].astype(str).apply(lambda x:x.replace('-', ''))
df.columns = ['open','high', 'close', 'low', 'volumw', 'turover', 'date']
df["name"] = name[i]
df["wind_code"] = wind_code[i]
df["time"] = 151500000
df["turn"] = 0
df["free_turn"] = 0
df = df[["wind_code", "name", "date", "time", "open", "high", 'low', 'close', 'volumw', 'turover', "turn", "free_turn"]]
df.to_csv("ts大盤數據/大盤數據"+new_date+"/"+dapanzhishu2[i]+".csv", index=False)table
## R
library(WindR) library(xlsx) library(data.table) library(magrittr) library(tcltk2) setwd("C:/Users/~~~/Desktop/數據") new.date <- "20171220" start.date <- "20090101" w.start() end.date <- paste(substr(new.date, 1, 4), substr(new.date, 5, 6), substr(new.date, 7, 8), sep = "-") start.date <- paste(substr(start.date, 1, 4), substr(start.date, 5, 6), substr(start.date, 7, 8), sep = "-") ## 摘取當日在市股票代碼 stock.code.df <- w.wset('sectorconstituent', paste0("date=", end.date, ";sectorid=a001010100000000")) if(stock.code.df$ErrorCode == 0){ stock.code.sh.sz <- stock.code.df$Data$wind_code }else{ print(paste0("獲取數據出錯,錯誤代碼", stock.code.df$ErrorCode)) } new.stock.code <- substr(stock.code.sh.sz, 1, 6) # str(stock.code.sh.sz) ## 建立新的文件夾 dir.create("歸納文件") dir.create("股票數據") dir.create(paste0("股票數據/股票數據", new.date)) ## 補全stock.code的開始及截至日期及其餘信息CSV文件 general.information <- data.frame(array(dim=c(length(stock.code.sh.sz), 5))) colnames(general.information) <- c("stock.code", "stock.name", "type", "starttime", "endtime") ## 設置進度條 pb <- tkProgressBar("進度", "已完成 %", 0, 100) ## 從WindR獲取數據 ## 未檢查數據是否存在異常日期(大盤中沒有的交易日) w.start() # i <- 1 for(i in 1:length(stock.code.sh.sz)){ wind.data <- w.wsd(stock.code.sh.sz[i], "trade_code, sec_name, open, high, low, close, volume, amt, free_turn,free_float_shares", start.date, end.date, "unit=1;PriceAdj=F") if(wind.data$ErrorCode == 0){ wind.df <- data.frame(array(dim=c(nrow(wind.data$Data), 12))) colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw", "turover", "free_turn", "free_float_shares") wind.df[, 1] <- wind.data$Code wind.df[, 2] <- wind.data$Data$SEC_NAME wind.df[, 3] <- gsub("-", "", wind.data$Data$DATETIME) wind.df[, 4] <- 151500000 wind.df[, 5:8] <- wind.data$Data[4:7] * 10000 wind.df[, 9:12] <- wind.data$Data[8:11] wind.df <- wind.df[!(is.na(wind.df$open)), ] if(nrow(wind.df) == 0){ print(c(i, stock.code[i])) }else{ if(any(is.na(wind.df))){ print(paste(stock.code.sh.sz[i], "數據出錯(數據中仍有NA)")) }else{ if(any(wind.df[, 5:8] == 0)){ print(paste(stock.code.sh.sz[i], "數據出錯(數據中開高低收存在0)")) }else{ if(any(table(wind.df$date) > 1)){ print(paste(stock.code.sh.sz[i], "數據出錯(數據中存在日期相同)")) }else{ if(any(wind.df$date != sort(wind.df$date))){ print(paste(stock.code.sh.sz[i], "數據出錯(數據中日期順序不對)")) }else{ general.information[i, 1] <- substr(stock.code.sh.sz[i], 1, 6) general.information[i, 2] <- wind.df$name[1] general.information[i, 3] <- substr(stock.code.sh.sz[i], 8, 9) general.information[i, 4] <- wind.df[1, 3] general.information[i, 5] <- wind.df[nrow(wind.df), 3] write.csv(wind.df, paste("股票數據/股票數據", new.date, "/", new.stock.code[i], ".csv", sep=""), row.names = FALSE) } } } } } }else{ print(paste0(stock.code.sh.sz[i], "股票獲取數據出錯,錯誤代碼:", wind.data$ErrorCode)) break } info <- sprintf("已完成 %d%%", round(i*100/length(stock.code.sh.sz))) setTkProgressBar(pb, i*100/length(stock.code.sh.sz), sprintf("進度 (%s)", info), info) } ## 關閉進度條 close(pb) ## 總歸納文件中無NA時輸出 if(all(!(is.na(general.information)))){ write.xlsx(general.information, paste0("歸納文件/歸納文件", new.date, ".xlsx"), row.names = FALSE) }else{ print("總概況文件中存在NA,需查驗") }
(1)一級
(2)二級
(3)三級