Movatterモバイル変換


[0]ホーム

URL:


tsdataleaks

R Package for detecting data leakages in time series forecastingcompetitions.

Installation

The development version fromGitHubwith:

# install.packages("devtools")devtools::install_github("thiyangt/tsdataleaks")library(tsdataleaks)

Example

To demonstrate the package functions, I created a small data set with4 time series.

set.seed(2020)a<-rnorm(15)d<-rnorm(10)lst<-list(a = a,b =c(a[10:15]+rep(8,6),rnorm(10), a[1:5], a[1:5]),c =c(rnorm(10),-a[1:5]),d = d,e = d)

find_dataleaks:Exploit data leaks

library(tsdataleaks)library(magrittr)library(tidyverse)library(viridis)# h - I assume test period length is 5 and took that as wind size, h.f1<-find_dataleaks(lstx = lst,h=5,cutoff=1)f1$a  .id start end2   b26$b  .id start end1   a152   b17214   c1115$c  .id start end1   a152   b17213   b2226$d  .id start end5   e610$e  .id start end4   d610

Interpretation: The first element in the list means the last 5observations of the time seriesa correlates with timeseriesb observarion from 2 to 6.

viz_dataleaks:Visualise the data leaks
viz_dataleaks(f1)[[1]]

[[2]][[2]]$a  .id start end2   b     2   6[[2]]$b  .id start end1   a     1   52   b    17  214   c    11  15[[2]]$c  .id start end1   a     1   52   b    17  213   b    22  26[[2]]$d  .id start end5   e     6  10[[2]]$e  .id start end4   d     6  10

reason_dataleaks

Display the reasons for data leaks and evaluate usefulness of dataleaks towards the winning of the competition

r1<-reason_dataleaks(lstx = lst,finddataleaksout = f1,h=5)r1[[1]]  series1 .id start end dist_mean dist_sd is.useful.leak dist_cor1       a   b26-8.00.0         useful12       b   a150.00.0         useful13       b   b17210.00.0         useful14       b   c1115-1.72.6     not useful-15       c   a151.72.6         useful-16       c   b17211.72.6         useful-17       c   b22261.72.6     not useful-18       d   e6100.00.0     not useful19       e   d6100.00.0     not useful1                                     reason1                              add constant2                               exact match3                               exact match4 multiply by-1 or negative constant value5 multiply by-1 or negative constant value6 multiply by-1 or negative constant value7 multiply by-1 or negative constant value8                               exact match9                               exact match[[2]]

A list without namingelement

a=rnorm(15)lst<-list(  a,c(a[10:15],rnorm(10), a[1:5], a[1:5]),c(rnorm(10), a[1:5]))f1<-find_dataleaks(lst,h=5)
viz_dataleaks(f1)#> [[1]]

#> #> [[2]]#> [[2]]$`1`#>   .id start end#> 2   2     2   6#> #> [[2]]$`2`#>   .id start end#> 1   1     1   5#> 2   2    17  21#> 4   3    11  15#> #> [[2]]$`3`#>   .id start end#> 1   1     1   5#> 2   2    17  21#> 3   2    22  26
reason_dataleaks(lst, f1,h=5)#> [[1]]#>   series1 .id start end dist_mean dist_sd is.useful.leak dist_cor      reason#> 1       1   2     2   6         0       0         useful        1 exact match#> 2       2   1     1   5         0       0         useful        1 exact match#> 3       2   2    17  21         0       0         useful        1 exact match#> 4       2   3    11  15         0       0     not useful        1 exact match#> 5       3   1     1   5         0       0         useful        1 exact match#> 6       3   2    17  21         0       0         useful        1 exact match#> 7       3   2    22  26         0       0     not useful        1 exact match#>#> [[2]]

Application to M-Competitiondata

M1 Competition - Yearly data

library(Mcomp)data("M1")M1Y<-subset(M1,"yearly")M1Y_x<-lapply(M1Y,function(temp){temp$x})m1y_f1<-find_dataleaks(M1Y_x,h=6,cutoff =1)m1y_f1#> $YAF17#>     .id start end#> 22 YAM6     9  14#>#> $YAM6#>      .id start end#> 16 YAF17    16  21#>#> $YAM28#>      .id start end#> 78 YAI21    16  21#>#> $YAB3#>     .id start end#> 18 YAM2    14  19#>#> $YAB4#>     .id start end#> 17 YAM1    15  20#>#> $YAI21#>      .id start end#> 43 YAM28    16  21#>#> $YAG29#>       .id start end#> 137 YAC15     6  11
viz_dataleaks(m1y_f1)#> [[1]]

#> #> [[2]]#> [[2]]$YAF17#>     .id start end#> 22 YAM6     9  14#> #> [[2]]$YAM6#>      .id start end#> 16 YAF17    16  21#> #> [[2]]$YAM28#>      .id start end#> 78 YAI21    16  21#> #> [[2]]$YAB3#>     .id start end#> 18 YAM2    14  19#> #> [[2]]$YAB4#>     .id start end#> 17 YAM1    15  20#> #> [[2]]$YAI21#>      .id start end#> 43 YAM28    16  21#> #> [[2]]$YAG29#>       .id start end#> 137 YAC15     6  11
reason_dataleaks(M1Y_x, m1y_f1,h=6,ang=90)#> [[1]]#>   series1   .id start end dist_mean dist_sd is.useful.leak dist_cor#> 1   YAF17  YAM6     9  14       5.4     0.4     not useful        1#> 2    YAM6 YAF17    16  21      -5.4     0.4     not useful        1#> 3   YAM28 YAI21    16  21       0.0     0.0     not useful        1#> 4    YAB3  YAM2    14  19       0.0     0.0         useful        1#> 5    YAB4  YAM1    15  20       0.0     0.0         useful        1#> 6   YAI21 YAM28    16  21       0.0     0.0     not useful        1#> 7   YAG29 YAC15     6  11  -36815.7  6159.2         useful        1#>                 reason#> 1 other transformation#> 2 other transformation#> 3          exact match#> 4          exact match#> 5          exact match#> 6          exact match#> 7 other transformation#>#> [[2]]


[8]ページ先頭

©2009-2025 Movatter.jp