Movatterモバイル変換

library(ODRF)#> 载入需要的程辑包：partykit#> Warning: 程辑包'partykit'是用R版本4.2.3 来建造的#> 载入需要的程辑包：grid#> 载入需要的程辑包：libcoin#> 载入需要的程辑包：mvtnormdata(seeds,package ="ODRF")set.seed(12)train<-sample(1:209,150)seeds_train<-data.frame(seeds[train, ])seeds_test<-data.frame(seeds[-train, ])forest<-ODRF(varieties_of_wheat~ ., seeds_train,split ="gini",parallel =FALSE)pred<-predict(forest, seeds_test[,-8])(e.forest<-mean(pred!= seeds_test[,8]))#> [1] 0.01694915data(body_fat,package ="ODRF")train<-sample(1:252,200)bodyfat_train<-data.frame(body_fat[train, ])bodyfat_test<-data.frame(body_fat[-train, ])tree<-ODT(Density~ ., bodyfat_train,split ="mse")pred<-predict(tree, bodyfat_test[,-1])(e.tree<-mean((pred- bodyfat_test[,1])^2))#> [1] 4.248171e-05btree<-ODBT(Density~ ., bodyfat_train, bodyfat_test[,-1],type ="reg",parallel =FALSE,model ="ODT",NodeRotateFun ="RotMatPPO")pred<- btree$results$prediction(e.btree<-mean((pred- bodyfat_test[,1])^2))#> [1] 3.718075e-05

In the following example, suppose the training data are available intwo batches. The first batch is used to train ODT and ODRF, and thesecond batch is used to update the model by online. The error after themodel update is significantly smaller than that of one batch of dataalone.

set.seed(17)index<-sample(nrow(seeds_train),floor(nrow(seeds_train)/2))forest1<-ODRF(varieties_of_wheat~ ., seeds_train[index, ],split ="gini",parallel =FALSE)pred<-predict(forest1, seeds_test[,-8])(e.forest.1<-mean(pred!= seeds_test[,8]))#> [1] 0.03389831forest2<-online(forest1, seeds_train[-index,-8], seeds_train[-index,8])pred<-predict(forest2, seeds_test[,-8])(e.forest.online<-mean(pred!= seeds_test[,8]))#> [1] 0.01694915index<-seq(floor(nrow(bodyfat_train)/2))tree1<-ODT(Density~ ., bodyfat_train[index, ],split ="mse")pred<-predict(tree1, bodyfat_test[,-1])(e.tree.1<-mean((pred- bodyfat_test[,1])^2))#> [1] 5.057853e-05tree2<-online(tree1, bodyfat_train[-index,-1], bodyfat_train[-index,1])pred<-predict(tree2, bodyfat_test[,-1])(e.tree.online<-mean((pred- bodyfat_test[,1])^2))#> [1] 5.065922e-05

prune first judges whether the error of new data is reduced or not ifapplied, starting from the last leaf nodes. For ODRF, if argument‘useOOB=TRUE’ then it uses ‘out-of-bag’ for pruning. Examples are asfollows.

set.seed(4)bodyfat_train<-rbind(as.matrix(bodyfat_train),matrix(rnorm(3000*5),5*200,15))seeds_train<-rbind(as.matrix(seeds_train),matrix(rnorm(1200*5),5*150,8))bodyfat_train[-seq(200),1]<-sample(bodyfat_train[seq(200),1],5*200,replace =TRUE)seeds_train[-seq(150),8]<-sample(seeds_train[seq(150),8],5*150,replace =TRUE)index<-sample(nrow(seeds_train),floor(nrow(seeds_train)/2))forest1<-ODRF(seeds_train[index,-8], seeds_train[index,8],split ="gini",parallel =FALSE)pred<-predict(forest1, seeds_test[,-8])(e.forest.1<-mean(pred!= seeds_test[,8]))#> [1] 0.1016949forest2<-prune(forest1, seeds_train[-index,-8], seeds_train[-index,8],useOOB =FALSE)pred<-predict(forest2, seeds_test[,-8])(e.forest.prune1<-mean(pred!= seeds_test[,8]))#> [1] 0.08474576forest3<-prune(forest1, seeds_train[index,-8], seeds_train[index,8])pred<-predict(forest3, seeds_test[,-8])(e.forest.prune2<-mean(pred!= seeds_test[,8]))#> [1] 0.08474576index<-sample(nrow(bodyfat_train),floor(nrow(bodyfat_train)/2))tree1<-ODT(bodyfat_train[index,-1], bodyfat_train[index,1],split ="mse")pred<-predict(tree1, bodyfat_test[,-1])(e.tree.1<-mean((pred- bodyfat_test[,1])^2))#> [1] 0.0001275841tree2<-prune(tree1, bodyfat_train[-index,-1], bodyfat_train[-index,1])pred<-predict(tree2, bodyfat_test[,-1])(e.tree.prune<-mean((pred- bodyfat_test[,1])^2))#> [1] 7.589647e-05

Note that, prune does not always improve efficiency because thenumber of observers in the training set is too small to build a simpletree structure. Therefore, we expand the training set with randomnumbers to make prune effective.

print the treestructure of ODT and ODRF

data(iris,package ="datasets")tree<-ODT(Species~ .,data = iris)#> Warning in ODT_compute(formula, Call, varName, X, y, Xsplit, split, lambda, :#> You are creating a tree for classificationprint(tree)#>#> =============================================================#> Oblique Classification Tree structure#> =============================================================#>#> 1) root#>    node2)# proj1*X < 0.29 -> (leaf1 = setosa)#>    node3)  proj1*X >= 0.29#>       node4)# proj2*X < 0.52 -> (leaf2 = versicolor)#>       node5)# proj2*X >= 0.52 -> (leaf3 = virginica)party.tree<-as.party(tree,data = iris)print(party.tree)#>#> Model formula:#> Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width#>#> Fitted party:#> [1] root#> |   [2] proj1*X >= 0.29165#> |   |   [3] proj2*X >= 0.52235: virginica (n = 54, err = 7.4%)#> |   |   [4] proj2*X < 0.52235: versicolor (n = 46, err = 0.0%)#> |   [5] proj1*X < 0.29165: setosa (n = 50, err = 0.0%)#>#> Number of inner nodes:    2#> Number of terminal nodes: 3forest<-ODRF(Species~ .,data = iris,parallel =FALSE)#> Warning in ODRF_compute(formula, Call, varName, X, y, split, lambda,#> NodeRotateFun, : You are creating a forest for classificationprint(forest)#>#> Call:#>  ODRF.formula(formula = Species ~ ., data = data, parallel = FALSE)#>                Type of oblique decision random forest: classification#>                                       Number of trees: 100#>                            OOB estimate of error rate: 4.67%#> Confusion matrix:#>            setosa versicolor virginica class_error#> setosa         50          0         0  0.00000000#> versicolor      0         47         4  0.07843122#> virginica       0          3        46  0.06122436

Plot the tree structure ofODT

Getting help

If you encounter a clear bug, please file an issue with a minimalreproducible example onGitHub.

Please note that this project is released with aContributor Codeof Conduct. By participating in this project you agree to abide byits terms.

Movatterモバイル変換

ODRF

Overview

Installation

Usage

print the treestructure of ODT and ODRF

Plot the tree structure ofODT

Getting help