對測試集進行測試,只提供了思路,程序是不能用的

 1 from sklearn.externals import joblib  2 import pandas as pd  3 import numpy  4 from sklearn.preprocessing import OneHotEncoder  5 #import link_and_train
 6 #拼接測試集,測試集進行one-hot編碼
 7 onehot = OneHotEncoder()  8 addata = pd.read_csv("adFeature.csv")  9 testdata = pd.read_csv("test1.csv") 10 userdata = pd.read_csv("userFeature.data") 11 data = pd.merge(testdata,userdata) 12 data = pd.merge(data,addata) 13 
14 data.to_csv("predict_data.csv",index=False) 15 
16 userfeature = ["age", "carrier", "consumptionAbility", "ct", "education", "gender", "house", "interest1", 17                    "interest2", "interest3", "interest4", "interest5", "kw1", "kw2", "kw3", "marriageStatus", "os", 18                    "topic1", "topic2", "topic3", "LBS", "appIdAction", "appIdInstall", "campaignId", "creativeId", 19                    "creativeSize", "adCategoryId", "advertiserId", "productId", "productType"] 20     # for index in data[feature] :
21 userdata = [] 22 for index in range(len(data["uid"])): 23     feature_li = [] 24     for feature in userfeature: 25             # a = data[feature]
26             # print(a[0],type(a[index]),isinstance(a[0],(numpy.int64)))
27 
28         if isinstance(data[feature][index], numpy.int64): 29  feature_li.append(int(data[feature][index])) 30         elif isinstance(data[feature][index], numpy.float64): 31             feature_li.append(0)  # 缺失值用0填充,這是不合理的,有待改進
32         elif isinstance(data[feature][index], numpy.float): 33  feature_li.append(0) 34         else: 35             trans = data[feature][index].strip().split(" ") 36             trans = map(int, trans) 37             trans = sorted(trans) 38             # print(trans)
39             s = 0 40             for num in trans: 41                 s += num 42  feature_li.append(s) 43         # print(feature_li)
44  userdata.append(feature_li) 45 userdata = numpy.array(userdata) 46 onehot.fit(userdata) 47 print("--------------------------------------------------------------------") 48 print("--------------------------------------------------------------------") 49 test = onehot.transform(userdata) 50 
51 print(test) 52 print(numpy.shape(test)) 53 
54 print("開始預測。。。") 55 for one in range(19000) : 56     for xx in range(1,155): 57         model = joblib.load("%d.model"%(xx)) 58         result = model.predict(test[one]) 59         s1 = 0 60         s2 = 0 61         if result == -1: 62             s1+=1
63         elif result == 1: 64             s2+=1
65     s = 0 66     if s1 > s2 : 67         print(one,"uid:",data["uid"][one],"aid:",data["aid"][one],"result:",s1/114) 68     elif s2>s1 : 69         print(one, "uid:", data["uid"][one], "aid:", data["aid"][one], "result:", s2/114) 70     else: 71         print("impossible")
相關文章
相關標籤/搜索