This vignette describes how to use GSCUSUM charts, an extension to standard CUSUM charts for binary performance data grouped in samples of unequal size.
Following information have to be available:
dplyr::group_indices())These information are collected in a numeric matrix.
Non-risk-adjusted example data:
head(gscusum_example_data)#> # A tibble: 6 x 4#> t y year block_identifier#> <int> <lgl> <dbl> <int>#> 1 1 FALSE 2016 1#> 2 2 FALSE 2016 1#> 3 3 FALSE 2016 1#> 4 4 FALSE 2016 1#> 5 5 FALSE 2016 1#> 6 6 FALSE 2016 1Risk-adjusted example data:
Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,
failure_probability <-mean(gscusum_example_data$y[gscusum_example_data$year==2016])n_patients <-nrow(gscusum_example_data[gscusum_example_data$year==2016,])and control limits have to be estimated:
cusum_limit <-cusum_limit_sim(failure_probability, n_patients,odds_multiplier =2,n_simulation =1000,alpha =0.05,seed =2046)print(cusum_limit)#> [1] 4.91128GSCUSUM charts are constructed on performance data from 2017.
gscusum_data <-gscusum_example_data[gscusum_example_data$year==2017,]input_outcomes <-matrix(c(gscusum_data$y, gscusum_data$block_identifier),ncol =2)gcs <-gscusum(input_outcomes = input_outcomes,failure_probability = failure_probability,odds_multiplier =2,limit = cusum_limit,max_num_shuffles =1000,quantiles =c(0.,0.05,0.25,0.5,0.75,.95,1))This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <-as.data.frame(gcs)names(gcs) <-c("sig_prob","avg","min","q05","q25","median","q75","q95","max")head(gcs)#> sig_prob avg min q05 q25 median q75 q95 max#> 1 0 0.08151062 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278#> 2 0 0.13900012 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557#> 3 0 0.15609100 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557#> 4 0 0.17359918 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557#> 5 0 0.19099323 0 0 0 0.00000000 0.3757019 0.7799377 0.9820557#> 6 0 0.19285629 0 0 0 0.08679199 0.3757019 0.5778198 0.9820557gcs$block_identifier <-input_outcomes[,2]gcs$t <-seq(1,nrow(gcs))col1 <- "#f7ba02"col2 <- "#4063bc"palette <-rep(c(col1, col2),300)ggplot()+geom_line(data = gcs,aes(x = t,y = sig_prob))+geom_point(data = gcs,aes(x = t,y = sig_prob,col =as.factor(block_identifier) ))+scale_color_manual(guide=FALSE,values = palette)+scale_y_continuous(name ="Signal Probability",limits =c(0,1))+theme_bw()The complete run can be plotted with:
nblock <-max(gcs$block_identifier)p <-ggplot(gcs)for ( iin1:nblock){ dblock <-gcs[gcs$block_identifier==i,] col <-ifelse(i%%2==0,col2,col1) dblock_before <-dblock[1,] dblock_before$t <-dblock_before$t-.5 dblock_after <-dblock[nrow(dblock),] dblock_after$t <-dblock_after$t+.5 dblock_n <-rbind(dblock, dblock_before, dblock_after) p <-p+geom_ribbon(data = dblock_n,aes(x = t,ymin = min,ymax = max),fill = col,alpha =0.2)+geom_ribbon(data = dblock_n,aes(x = t,ymin = q05,ymax = q95),fill = col,alpha =0.2)+geom_ribbon(data = dblock_n,aes(x = t,ymin = q25,ymax = q75),fill = col,alpha =0.2)}p <-p+geom_line(data = gcs,aes(x = t,y = median))+geom_point(data = gcs,aes(x = t,y = median,fill =as.factor(block_identifier)),size=2,pch =21)+geom_hline(aes(yintercept = cusum_limit),linetype =2)+theme_bw()+scale_y_continuous(name ="CUSUM Distribution")+scale_x_continuous(name ="Sequence of Observations")+scale_fill_manual(values = palette,guide =FALSE)+labs(subtitle ="GSCUSUM")pLike in the standard RA-CUSUM chart (see vignette for CUSUM charts), parameters are estimated in order to set up the charts,
and control limits are set:
racusum_limit <-racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year==2016],odds_multiplier =2,n_simulation =1000,alpha =0.05,seed =2046)print(racusum_limit)#> [1] 2.403465GSCUSUM charts are constructed on performance data from 2017.
ragscusum_data <-ragscusum_example_data[ragscusum_example_data$year==2017,]input_outcomes <-matrix(c(gscusum_data$y, gscusum_data$block_identifier),ncol =2)gcs <-gscusum(input_outcomes = input_outcomes,failure_probability = failure_probability,odds_multiplier =2,limit = cusum_limit,max_num_shuffles =1000,quantiles =c(0.,0.05,0.25,0.5,0.75,.95,1))This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <-as.data.frame(gcs)names(gcs) <-c("sig_prob","avg","min","q05","q25","median","q75","q95","max")head(gcs)#> sig_prob avg min q05 q25 median q75 q95 max#> 1 0 0.07905548 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278#> 2 0 0.12831284 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557#> 3 0 0.16331018 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557#> 4 0 0.17408902 0 0 0 0.00000000 0.2889099 0.5778198 0.9820557#> 5 0 0.19183142 0 0 0 0.00000000 0.3757019 0.7799377 0.9820557#> 6 0 0.19487628 0 0 0 0.08679199 0.3757019 0.5778198 0.9820557gcs$block_identifier <-input_outcomes[,2]gcs$t <-seq(1,nrow(gcs))col1 <- "#f7ba02"col2 <- "#4063bc"palette <-rep(c(col1, col2),300)ggplot()+geom_line(data = gcs,aes(x = t,y = sig_prob))+geom_point(data = gcs,aes(x = t,y = sig_prob,col =as.factor(block_identifier) ))+scale_color_manual(guide=FALSE,values = palette)+scale_y_continuous(name ="Signal Probability",limit =c(0,1))+theme_bw()The complete run can be plotted with:
nblock <-max(gcs$block_identifier)p <-ggplot(gcs)for ( iin1:nblock){ dblock <-gcs[gcs$block_identifier==i,] col <-ifelse(i%%2==0,col2,col1) dblock_before <-dblock[1,] dblock_before$t <-dblock_before$t-.5 dblock_after <-dblock[nrow(dblock),] dblock_after$t <-dblock_after$t+.5 dblock_n <-rbind(dblock, dblock_before, dblock_after) p <-p+geom_ribbon(data = dblock_n,aes(x = t,ymin = min,ymax = max),fill = col,alpha =0.2)+geom_ribbon(data = dblock_n,aes(x = t,ymin = q05,ymax = q95),fill = col,alpha =0.2)+geom_ribbon(data = dblock_n,aes(x = t,ymin = q25,ymax = q75),fill = col,alpha =0.2)}p <-p+geom_line(data = gcs,aes(x = t,y = median))+geom_point(data = gcs,aes(x = t,y = median,fill =as.factor(block_identifier)),size=2,pch =21)+geom_hline(aes(yintercept = cusum_limit),linetype =2)+theme_bw()+scale_y_continuous(name ="RACUSUM Distribution")+scale_x_continuous(name ="Sequence of Observations")+scale_fill_manual(values = palette,guide =FALSE)+labs(subtitle ="RA-GSCUSUM")p