Note
Go to the endto download the full example code.
Demo for using cross validation
importosfromtypingimportAny,Dict,Tupleimportnumpyasnpimportxgboostasxgb# load data in do trainingCURRENT_DIR=os.path.dirname(__file__)dtrain=xgb.DMatrix(os.path.join(CURRENT_DIR,"../data/agaricus.txt.train?format=libsvm"))param={"max_depth":2,"eta":1,"objective":"binary:logistic"}num_round=2print("running cross validation")# do cross validation, this will print result out as# [iteration] metric_name:mean_value+std_value# std_value is standard deviation of the metricxgb.cv(param,dtrain,num_round,nfold=5,metrics={"error"},seed=0,callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)],)print("running cross validation, disable standard deviation display")# do cross validation, this will print result out as# [iteration] metric_name:mean_valueres=xgb.cv(param,dtrain,num_boost_round=10,nfold=5,metrics={"error"},seed=0,callbacks=[xgb.callback.EvaluationMonitor(show_stdv=False),xgb.callback.EarlyStopping(3),],)print(res)print("running cross validation, with preprocessing function")# define the preprocessing function# used to return the preprocessed training, test data, and parameter# we can use this to do weight rescale, etc.# as a example, we try to set scale_pos_weightdeffpreproc(dtrain:xgb.DMatrix,dtest:xgb.DMatrix,param:Any)->Tuple[xgb.DMatrix,xgb.DMatrix,Dict[str,Any]]:label=dtrain.get_label()ratio=float(np.sum(label==0))/np.sum(label==1)param["scale_pos_weight"]=ratioreturn(dtrain,dtest,param)# do cross validation, for each fold# the dtrain, dtest, param will be passed into fpreproc# then the return value of fpreproc will be used to generate# results of that foldxgb.cv(param,dtrain,num_round,nfold=5,metrics={"auc"},seed=0,fpreproc=fpreproc)#### you can also do cross validation with customized loss function# See custom_objective.py##print("running cross validation, with customized loss function")deflogregobj(preds:np.ndarray,dtrain:xgb.DMatrix)->Tuple[np.ndarray,np.ndarray]:labels=dtrain.get_label()preds=1.0/(1.0+np.exp(-preds))grad=preds-labelshess=preds*(1.0-preds)returngrad,hessdefevalerror(preds:np.ndarray,dtrain:xgb.DMatrix)->Tuple[str,float]:labels=dtrain.get_label()preds=1.0/(1.0+np.exp(-preds))return"error",float(sum(labels!=(preds>0.0)))/len(labels)param={"max_depth":2,"eta":1}# train with customized objectivexgb.cv(param,dtrain,num_round,nfold=5,seed=0,obj=logregobj,custom_metric=evalerror)