##################################### XGBoost Classification##################################### Setup Environmentimportpkg_resourcesimporttimeitimportdatatableasdtimportretrofitfromretrofitimportDatatableFEasdtfefromretrofitimportMachineLearningasml# Load some dataFilePath=pkg_resources.resource_filename('retrofit','datasets/ClassificationData.csv')data=dt.fread(FilePath)# Instantiate Feature Engineering ClassFE=dtfe.FE()# Create some lagsdata=FE.FE0_AutoLags(data,LagColumnNames=['Independent_Variable1','Independent_Variable2'],DateColumnName='DateTime',ByVariables='Factor_1',LagPeriods=[1,2],ImputeValue=-1,Sort=True,use_saved_args=False)# Create some rolling statsdata=FE.FE0_AutoRollStats(data,RollColumnNames=['Independent_Variable1','Independent_Variable2'],DateColumnName='DateTime',ByVariables='Factor_1',MovingAvg_Periods=[1,2],MovingSD_Periods=[2,3],MovingMin_Periods=[1,2],MovingMax_Periods=[1,2],ImputeValue=-1,Sort=True,use_saved_args=False)# Create some diffsdata=FE.FE0_AutoDiff(data,DateColumnName='DateTime',ByVariables=['Factor_1','Factor_2','Factor_3'],DiffNumericVariables='Independent_Variable1',DiffDateVariables=None,DiffGroupVariables=None,NLag1=0,NLag2=1,Sort=True,use_saved_args=False)# Dummifydata=FE.FE1_DummyVariables(data=data,CategoricalColumnNames= ['Factor_1','Factor_2','Factor_3'],use_saved_args=False)data=data[:, [namenotin ['Factor_1','Factor_2','Factor_3']fornameindata.names]]# Create Calendar Varsdata=FE.FE1_AutoCalendarVariables(data,DateColumnNames='DateTime',CalendarVariables=['wday','month','quarter'],use_saved_args=False)# Type conversions for modelingdata=FE.FE2_ColTypeConversions(data,Int2Float=True,Bool2Float=True,RemoveDateCols=True,RemoveStrCols=False,SkipCols=None,use_saved_args=False)# Drop Text Cols (no word2vec yet)data=data[:, [zforzindata.namesifznotin ['Comment']]]# Create partitioned data setsDataFrames=FE.FE2_AutoDataPartition(data,DateColumnName=None,PartitionType='random',Ratios= [0.7,0.2,0.1],ByVariables=None,Sort=False,use_saved_args=False)# FeaturesFeatures= [zforzinlist(data.names)ifnotzin ['Adrian','DateTime','Comment','Weights']]# Prepare modeling data setsModelData=ml.ML0_GetModelData(Processing='xgboost',TrainData=DataFrames['TrainData'],ValidationData=DataFrames['ValidationData'],TestData=DataFrames['TestData'],ArgsList=None,TargetColumnName='Adrian',NumericColumnNames=Features,CategoricalColumnNames=None,TextColumnNames=None,WeightColumnName=None,Threads=-1,InputFrame='datatable')# Get args list for algorithm and target typeModelArgs=ml.ML0_Parameters(Algorithms='XGBoost',TargetType='Classification',TrainMethod='Train')# Update iterations to run quicklyModelArgs.get('XGBoost').get('AlgoArgs')['num_boost_round']=50# Initialize RetroFitx=ml.RetroFit(ModelArgs,ModelData,DataFrames)# Train Modelx.ML1_Single_Train(Algorithm='XGBoost')# Score datax.ML1_Single_Score(DataName=x.DataSetsNames[2],ModelName=x.ModelListNames[0],Algorithm='XGBoost',NewData=None)# Evaluate scored datametrics=x.ML1_Single_Evaluate(FitName=x.FitListNames[0],TargetType=x.ModelArgs.get('XGBoost')['TargetType'],ScoredDataName=x.DataSetsNames[-1],ByVariables=None,CostDict=dict(tpcost=0,fpcost=1,fncost=1,tncost=0))# Metricsmetrics.keys()# Scoring data namesx.DataSetsNames# Scoring datax.DataSets.get('Scored_test_data_XGBoost_1')# Check ModelArgs Dictx.PrintAlgoArgs(Algo='XGBoost')# List of model namesx.ModelListNames# List of model fitted namesx.FitListNames