1 from sklearn.externals import joblib 2 import pandas as pd 3 import numpy 4 from sklearn.preprocessing import OneHotEncoder 5 #import link_and_train 6 #拼接測試集,測試集進行one-hot編碼 7 onehot = OneHotEncoder() 8 addata = pd.read_csv("adFeature.csv") 9 testdata = pd.read_csv("test1.csv") 10 userdata = pd.read_csv("userFeature.data") 11 data = pd.merge(testdata,userdata) 12 data = pd.merge(data,addata) 13 14 data.to_csv("predict_data.csv",index=False) 15 16 userfeature = ["age", "carrier", "consumptionAbility", "ct", "education", "gender", "house", "interest1", 17 "interest2", "interest3", "interest4", "interest5", "kw1", "kw2", "kw3", "marriageStatus", "os", 18 "topic1", "topic2", "topic3", "LBS", "appIdAction", "appIdInstall", "campaignId", "creativeId", 19 "creativeSize", "adCategoryId", "advertiserId", "productId", "productType"] 20 # for index in data[feature] : 21 userdata = [] 22 for index in range(len(data["uid"])): 23 feature_li = [] 24 for feature in userfeature: 25 # a = data[feature] 26 # print(a[0],type(a[index]),isinstance(a[0],(numpy.int64))) 27 28 if isinstance(data[feature][index], numpy.int64): 29 feature_li.append(int(data[feature][index])) 30 elif isinstance(data[feature][index], numpy.float64): 31 feature_li.append(0) # 缺失值用0填充,這是不合理的,有待改進 32 elif isinstance(data[feature][index], numpy.float): 33 feature_li.append(0) 34 else: 35 trans = data[feature][index].strip().split(" ") 36 trans = map(int, trans) 37 trans = sorted(trans) 38 # print(trans) 39 s = 0 40 for num in trans: 41 s += num 42 feature_li.append(s) 43 # print(feature_li) 44 userdata.append(feature_li) 45 userdata = numpy.array(userdata) 46 onehot.fit(userdata) 47 print("--------------------------------------------------------------------") 48 print("--------------------------------------------------------------------") 49 test = onehot.transform(userdata) 50 51 print(test) 52 print(numpy.shape(test)) 53 54 print("開始預測。。。") 55 for one in range(19000) : 56 for xx in range(1,155): 57 model = joblib.load("%d.model"%(xx)) 58 result = model.predict(test[one]) 59 s1 = 0 60 s2 = 0 61 if result == -1: 62 s1+=1 63 elif result == 1: 64 s2+=1 65 s = 0 66 if s1 > s2 : 67 print(one,"uid:",data["uid"][one],"aid:",data["aid"][one],"result:",s1/114) 68 elif s2>s1 : 69 print(one, "uid:", data["uid"][one], "aid:", data["aid"][one], "result:", s2/114) 70 else: 71 print("impossible")