Informally test our various CV functions on theISLR::Auto dataset using SRS-CV, clustered-CV, and stratified-CV.
(We are arbitrarily using year as either clusterID or stratumID.)
Just run the functions on a somewhat arbitrary dataset to ensure that the output format is what we’re looking for, and that the output is roughly consistent across the different functions that are doing the same thing.
tests directory, not a vignette.surveyCV appropriately on a real complex-survey dataset?library(surveyCV)library(survey)library(ISLR)data(Auto)with(Auto,plot(horsepower, mpg,pch ='.'))cv.svy()# Test the general cv.svy() function# The first line's results are usually around:# linear lm: mean=24.3, se=1.45# quadratic lm: mean=19.3, se=1.38cv.svy(Auto,c("mpg~poly(horsepower,1, raw = TRUE)","mpg~poly(horsepower,2, raw = TRUE)"),nfolds =10)## mean SE## .Model_1 24.071 1.8454## .Model_2 19.173 1.7553cv.svy(Auto,c("mpg~poly(horsepower,1,raw=TRUE)","mpg~poly(horsepower,2,raw=TRUE)"),nfolds =10,clusterID ="year")## mean SE## .Model_1 27.271 4.6104## .Model_2 21.184 3.8695cv.svy(Auto,c("mpg~poly(horsepower,1, raw = TRUE)","mpg~poly(horsepower,2, raw = TRUE)"),nfolds =10,strataID ="year")## mean SE## .Model_1 24.110 1.7364## .Model_2 19.114 1.6673cv.svydesign()# Test the cv.svydesign() function, which takes a svydesign object and formulasauto.srs.svy<-svydesign(ids =~0,data = Auto)## Warning in svydesign.default(ids = ~0, data = Auto): No weights or probabilities## supplied, assuming equal probabilityauto.clus.svy<-svydesign(ids =~year,data = Auto)## Warning in svydesign.default(ids = ~year, data = Auto): No weights or## probabilities supplied, assuming equal probabilityauto.strat.svy<-svydesign(ids =~0,strata =~year,data = Auto)## Warning in svydesign.default(ids = ~0, strata = ~year, data = Auto): No weights## or probabilities supplied, assuming equal probabilitycv.svydesign(formulae =c("mpg~poly(horsepower,1, raw = TRUE)","mpg~poly(horsepower,2, raw = TRUE)","mpg~poly(horsepower,3, raw = TRUE)"),design_object = auto.srs.svy,nfolds =10)## mean SE## .Model_1 24.272 1.8579## .Model_2 19.276 1.7724## .Model_3 19.378 1.8126cv.svydesign(formulae =c("mpg~poly(horsepower,1, raw = TRUE)","mpg~poly(horsepower,2, raw = TRUE)","mpg~poly(horsepower,3, raw = TRUE)"),design_object = auto.clus.svy,nfolds =10)## mean SE## .Model_1 26.963 4.6224## .Model_2 20.740 3.8867## .Model_3 20.789 3.8934cv.svydesign(formulae =c("mpg~poly(horsepower,1, raw = TRUE)","mpg~poly(horsepower,2, raw = TRUE)","mpg~poly(horsepower,3, raw = TRUE)"),design_object = auto.strat.svy,nfolds =10)## mean SE## .Model_1 24.041 1.7297## .Model_2 19.108 1.6763## .Model_3 19.130 1.7104cv.svyglm()# Test the cv.svyglm() function, which takes one svyglm objectsrs.model<-svyglm(mpg~horsepower+I(horsepower^2)+I(horsepower^3),design = auto.srs.svy)clus.model<-svyglm(mpg~horsepower+I(horsepower^2)+I(horsepower^3),design = auto.clus.svy)strat.model<-svyglm(mpg~horsepower+I(horsepower^2)+I(horsepower^3),design = auto.strat.svy)cv.svyglm(glm = srs.model,nfolds =10)## mean SE## .Model_1 19.43 1.8208cv.svyglm(glm = clus.model,nfolds =10)## mean SE## .Model_1 20.625 3.8178cv.svyglm(glm = strat.model,nfolds =10)## mean SE## .Model_1 19.279 1.7197seed=20210708set.seed(seed)cv.svy(Auto,"mpg~horsepower+I(horsepower^2)+I(horsepower^3)",nfolds =10)## mean SE## .Model_1 19.466 1.8254set.seed(seed)cv.svydesign(formulae ="mpg~horsepower+I(horsepower^2)+I(horsepower^3)",design_object = auto.srs.svy,nfolds =10)## mean SE## .Model_1 19.466 1.8254srs.model<-svyglm(mpg~horsepower+I(horsepower^2)+I(horsepower^3),design = auto.srs.svy)set.seed(seed)cv.svyglm(glm = srs.model,nfolds =10)## mean SE## .Model_1 19.466 1.8254Auto$isAmerican<- Auto$origin==1with(Auto,plot(horsepower, isAmerican))cv.svy(Auto,c("isAmerican~horsepower","isAmerican~horsepower+I(horsepower^2)","isAmerican~horsepower+I(horsepower^2)+I(horsepower^3)"),nfolds =10,method ="logistic")## mean SE## .Model_1 0.50118 0.0231## .Model_2 0.50184 0.0233## .Model_3 0.49620 0.0245# Should stop early because method isn't linear or logistictry(cv.svy(Auto,"mpg~horsepower+I(horsepower^2)+I(horsepower^3)",nfolds =10,method ="abcde"))## Error in match.arg(method) : 'arg' should be one of "linear", "logistic"# Should try to run but crash because response variable isn't 0/1try(cv.svy(Auto,"mpg~horsepower+I(horsepower^2)+I(horsepower^3)",nfolds =10,method ="logistic"))## Error in eval(family$initialize) : y values must be 0 <= y <= 1