1.安装xgboost pip3 install xgboost
2.xgboost demo
agaricus.txt.train agaricus.txt.test 蘑菇数据集---二分类问题
数据详细的解释地址 https://archive.ics.uci.edu/ml/datasets/Mushroom
每一行数据有22个 属性
# coding:utf-8import xgboost as xgb# 计算分类正确率from sklearn.metrics import accuracy_score# read in data,数据在xgboost安装的路径下的demo目录,现在我们将其copy到当前代码下的data目录my_workpath = '../data/'dtrain = xgb.DMatrix(my_workpath + 'agaricus.txt.train')dtest = xgb.DMatrix(my_workpath + 'agaricus.txt.test')# specify parameters via mapparam = {'max_depth': 2, 'eta': 1, 'silent': 0, 'objective': 'binary:logistic'}print(param)# 设置boosting迭代计算次数num_round = 2import timestarttime = time.clock()bst = xgb.train(param, dtrain, num_round) # dtrain是训练数据集endtime = time.clock()print(endtime - starttime)train_preds = bst.predict(dtrain) #print("train_preds", train_preds)train_predictions = [round(value) for value in train_preds]print("train_predictions", train_predictions)y_train = dtrain.get_label()print("y_train", y_train)train_accuracy = accuracy_score(y_train, train_predictions)print("Train Accuary: %.2f%%" % (train_accuracy * 100.0))# make predictionpreds = bst.predict(dtest)predictions = [round(value) for value in preds]y_test = dtest.get_label()test_accuracy = accuracy_score(y_test, predictions)print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))