研究 -- 不少時候,就是證僞

有個想法:
用期貨交易數據的 10個tik 信息,去檢驗3分鐘後,價格波動是否有關係。  或者說神經網絡可否識別。  

結果:
花了兩天時間,寫代碼,提取數據, 寫網絡模型, 訓練。
證實結果:
1.網絡模型不能有效識別 10個tik中的潛在的特徵,並做出有效判斷。 
2.或者說  3分鐘後的結果,與10個tik中的特徵 是無關的。 


代碼(數據提取):網絡

# encoding: UTF-8

import pandas as pd import numpy as np import time import os import sys import copy file = '/home/hylas/dev/data/ru05_20171208.csv'


# file ='d:/test/ru09_20171219.csv'

# 本程序主要功能就是生成 tik10x3miny X,y 數據用於網絡預測, 天天的數據提出出X,y 保存到 .h5 文件 # 兩組數據, 一組是 5tik , 3分鐘後的dim 作爲 Y , dim >=10 爲0, <=-10 爲1, 其它過濾 # 一組是 10tik , 3分鐘後的dim 作爲 Y 同上

# tik 定義 [index, price, vol, amount, bidprice, bvol, askprice,askvol, openvol, closevol, ,type1,type2 ] # X 定義 tik x5 or tik x10 # Y 定義 dim >=10 爲0, <=-10 爲1, 其它過濾


class tik10x3min(): srcpath = None resultpath ='./temp/' tikcount=0 tiklist=[] #tiklist_df = None
    Xy_file =[] curFile = None Xdata=None ydata=None def __init__(self): pass

    def initData(self): self.tikcount = 0 self.tiklist = [] self.yDimList =[] self.Xdata = None self.ydata = None self.Xy_file = [] self.curFile = None pass

    # 0 1 2 3 4 5 6 7 8 9 10 11
    # [index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2]
    #計算 openvol, closevol,, type1, type2
    def satype(self,lasttik, curtik ): #newtik = copy.deepcopy( curtik )
        if( curtik[1] >=  lasttik[6]) : curtik[10] = 1
        if (curtik[1] <= lasttik[4]): curtik[10] = -1 curtik[8]  =   (curtik[2] + curtik[3] )/2 curtik[9] = (curtik[2] - curtik[3]) / 2

        if( curtik[8] == 0 ): #雙平
            curtik[11] = 1

        if( 0 == curtik[9] ): #雙開
            curtik[11] = 2

        if( curtik[10] == 1 and  curtik[8] > curtik[9] ): #多開
            curtik[11] = 3

        if( curtik[10] == 1 and  curtik[8] < curtik[9] ): #空平
            curtik[11] = 4

        if( curtik[10] == 1 and  curtik[8] == curtik[9] ): #多換
            curtik[11] = 5

        if( curtik[10] == -1 and  curtik[8] > curtik[9] ): #空開
            curtik[11] = 6

        if( curtik[10] == -1 and  curtik[8] < curtik[9] ): #多平
            curtik[11] = 7

        if( curtik[10] == -1 and  curtik[8] == curtik[9] ): #空換
            curtik[11] = 8

        return curtik pass

    def dotik(self,tik): pass

    def dotiklist( self ): data = np.array( self.tiklist ) print data.shape print 'for tiklist: '

        for i in range(10, data.shape[0] ): y = self.yDimList[i] if( y>-10 and  y <10 ): continue
            #print y
            #print y
            y = y/abs(y) x_data_rc = self.tiklist[ i-10:i ] x_data_rc = np.array( x_data_rc ) lastprice = self.tiklist[ i ][1] #print x_data_rc
            opNumber = np.array([ 0,lastprice,0,0,lastprice,0,lastprice,0,0,0,0,0 ], dtype=int) x_data_rc = x_data_rc - opNumber #print x_data_rc
            x_data_rc = x_data_rc.reshape( x_data_rc.shape[0]*x_data_rc.shape[1] ) #print x_data_rc

            #self.Xdata.append( x_data_rc.tolist() )
            #self.ydata.append( [y] )
            y_data_rc = np.array( [y] ) if(self.Xdata is None): self.Xdata = x_data_rc #np.array( x_data_rc.tolist() )
                self.ydata = y_data_rc else: self.Xdata = np.vstack((self.Xdata , x_data_rc)) self.ydata = np.vstack((self.ydata, y_data_rc )) pass
        if(self.Xdata is None): return 
        print self.Xdata.shape print self.ydata.shape print self.Xdata[0:10] print self.ydata[0:20] pass

    def dofile(self, file): self.initData() #self.bsM.init()
        try: df = pd.read_csv(file, header=0, encoding='gbk') except Exception, e: return self.curFile = file self.tiklist_df = df data = np.array(df) print data.shape print 'for file: '

        for i in range(1, data.shape[0] - 360 ): lasttik = [i-1,data[i-1, 3], data[i-1, 5], data[i-1, 7], data[i-1, 8],data[i-1, 9],data[i-1, 10],data[i-1, 11], 0, 0, 0, 0 ] curtik = [i,data[i, 3], data[i, 5], data[i, 7], data[i, 8],data[i, 9],data[i, 10],data[i, 11],0,0,0,0 ] curtik = self.satype(lasttik, curtik ) self.tiklist.append( curtik ) self.yDimList.append( (data[i+360, 3]  -  data[i, 3] )   /5 ) #self.dotik(tik)

            #[index, price, vol, amount, bidprice, bvol, askprice, askvol, openvol, closevol,, type1, type2]

        pass self.dotiklist() if(self.Xdata is None): return 

        #保存到文件
        srcfile = os.path.basename( file ) fileflag = srcfile[0:-4] destFile = self.resultpath + 'tik10x3min_X_'+ fileflag+'.h5' df = pd.DataFrame( self.Xdata )  # X.reshape( X.shape[0], 120*120 )
        print destFile df.to_hdf(destFile, 'data') df = pd.DataFrame(self.ydata) df.to_hdf(destFile, 'label') self.Xy_file.append( destFile ) def setPath(self,srcpath, resultpath): self.srcpath = srcpath self.resultpath = resultpath pass
    
    def dopath(self, srcpath, resultpath ): self.setPath( srcpath, resultpath) rootdir = srcpath list = os.listdir(rootdir)  # 列出文件夾下全部的目錄與文件

        for i in range(0, len(list)): print ('%d / %d' % (i, len(list))) path = os.path.join(rootdir, list[i]) if os.path.isfile(path) == False: continue
            # 你想對文件的操做
            print path self.dofile( path ) pass xy_df = pd.DataFrame( self.Xy_file ) xy_df.to_csv( resultpath +'data.txt') pass

def test(): data = [ [1,2,3],[4,5,6],[7,8,9] ] xdata = np.array( data ) print xdata a2 = np.array([1, 1, 3], dtype=int) xdata = xdata - a2 print xdata pass


if __name__ == "__main__": sys_code_type = sys.getfilesystemencoding() test() model = tik10x3min() #model.dofile( file )
    # /home/hylas/dev/data/ru/20171205
    #model.dopath('/home/hylas/dev/data/ru/20171205/', './temp/')
    #model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/', '/home/hylas/dev/data2/futuretik/ru/goodmin/ru2015X_goodmin_tik/')
    model.dopath('/home/hylas/dev/data2/futuretik/ru/ru2015/','/home/hylas/dev/data2/futuretik/ru/tik10x3min/') # /home/hylas/dev/data2/futuretik/ru/goodmin/ru2017


    #model.dopath('D:/test/rufile/','D:/test/result/')

 



代碼(網絡訓練):app

# encoding: UTF-8

from sklearn.datasets import fetch_mldata import pandas as pd import numpy as np import time import sys sys.path.append("/home/hylas/dev/py/project/lib/hyNN/") #sys.path.append("../../lib/hyNN/") #sys.path.append("..")
import tool from tool.dataxdo import *
from tool.dfdo import *
from tool.datadraw import *
from tool.imgdo import *

from ML.CNN import *
from ML.MLP import *
from ML.LSTMer import *

#sys.path.append("/home/hylas/dev/py/project/lib/") #import common #from common.bsMonitor import *

import os import sys from sklearn import datasets from keras.utils import np_utils #讀取文件, 造成X,y #把X,y 放到 MLP LS他M 裏面識別


class runTik10x3miny(): X = None y = None def __init__(self): print 'runTik10x3miny init'

        pass

    def dofile(self, path): df_X = pd.read_hdf(path, 'data') df_y = pd.read_hdf(path, 'label') #print df_y
        df_y[[0]] = df_y[[0]].astype(int) df_y[0] = df_y[0].map({ -1:0,1:1 }) #print df_X[0:100]
        #print df_y
        npX = np.array(df_X) npY = np.array(df_y) if(self.X is None ): self.X = npX self.y = npY else: if( self.X.shape[1] != npX.shape[1] ): print ' self.X.shape[1] != npX.shape[1] '
                print self.X.shape[1], npX.shape[1] return
                pass self.X = np.vstack((self.X , npX )) self.y = np.vstack((self.y, npY )) pass


    def loadXyFromDiskPath(self, datapath ): rootdir = datapath list = os.listdir(rootdir)  # 列出文件夾下全部的目錄與文件

        for i in range(0, len(list)): print ('%d / %d' % (i, len(list))) path = os.path.join(rootdir, list[i]) if os.path.isfile(path) == False: continue fileEx = path[-3:] #print fileEx
            if( '.h5' != fileEx ): continue
            # 你想對文件的操做
            print path self.dofile( path ) #break
        pass
        pass
        return self.X, self.y def makedata(self): destFile = '/home/hylas/dev/data2/futuretik/ru/modeh5data/tik10x3miny.h5'
        if (os.path.exists(destFile) == True ): X = pd.read_hdf(destFile, 'data') y = pd.read_hdf(destFile, 'label') X = np.array(X) y = np.array(y) #X.reshape(X.shape[0], 120 , 120, 1)

        else: dd = dfdo() X, y = self.loadXyFromDiskPath('/home/hylas/dev/data2/futuretik/ru/tik10x3min/') print  y[0:100] y =  np_utils.to_categorical(y, num_classes=2) X, y = dd.datadengfen(X, y) df = pd.DataFrame( X )  #X.reshape( X.shape[0], 120*120 )
            df.to_hdf(destFile, 'data') df = pd.DataFrame(y) df.to_hdf(destFile, 'label') return X,y pass


    def do(self ): model = MLP() X,y = self.makedata() print X.shape print y.shape print X[0:10] print y[0:10] #X = X.reshape( X.shape[0], 120*120 )
 model.simple_result(X,y) #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2)
        pass

    def do2(self ): model = LSTMer() X,y = self.makedata() print X.shape print y.shape print X[0:10] print y[0:10] #X = X.reshape( X.shape[0], 120*120 )
        model.simple_result(X,y,timesteps =10, data_dim=12) #model.simple_result(X, y, input_dim=(120, 120, 1), nClass=2)
        pass

if __name__ == "__main__": sys_code_type = sys.getfilesystemencoding() run = runTik10x3miny() run.do()

 



訓練結果:fetch

1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5092 - val_loss: 0.6924 - val_acc: 0.5133 Epoch 67/68
1263074/1263074 [==============================] - 22s - loss: 0.6924 - acc: 0.5097 - val_loss: 0.6924 - val_acc: 0.5117 Epoch 68/68
1263074/1263074 [==============================] - 22s - loss: 0.6923 - acc: 0.5093 - val_loss: 0.6921 - val_acc: 0.5102 evaluate acc: 150144/155936 [===========================>..] - ETA: 0s[0.6920651392649998, 0.50743253642520003]

 


歡迎討論(QQ羣):   375129936lua

相關文章
相關標籤/搜索