根據心情補充,語言都是Pythonspa
from sklearn.preprocessing import LabelEncoder for c in train.columns: if train[c].dtype == 'object': lbl = LabelEncoder() lbl.fit(list(train[c].values) + list(test[c].values)) train[c] = lbl.transform(list(train[c].values)) test[c] = lbl.transform(list(test[c].values))
'''Train the xgb model then predict the test data'''code
xgb_params = { 'n_trees': 520, 'eta': 0.0045, 'max_depth': 4, 'subsample': 0.93, 'objective': 'reg:linear', 'eval_metric': 'rmse', 'base_score': y_mean, # base prediction = mean(target) 'silent': 1 } # NOTE: Make sure that the class is labeled 'class' in the data file dtrain = xgb.DMatrix(train.drop('y', axis=1), y_train) dtest = xgb.DMatrix(test) num_boost_rounds = 1250 # train model model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) y_pred = model.predict(dtest)
enc = OneHotEncoder(handle_unknown='ignore') enc=enc.fit(pd.concat([X[categorical],X_test[categorical]])) X_cat_sparse=enc.transform(X[categorical]) X_test_cat_sparse=enc.transform(X_test[categorical])