@@ -63,107 +63,88 @@ get_team_abrev_footywire <- function(team) {
6363# ' @keywords internal
6464# ' @noRd
6565footywire_html <- function (x ,id ) {
66- # First get extra information
67- game_details <- x %> %
68- rvest :: html_node(" tr:nth-child(2) .lnorm" ) %> %
69- rvest :: html_text()
70-
71- # We need to extract Round and venue from that text
72- Round <- stringr :: str_split(game_details ," ," )[[1 ]][1 ] %> % trimws()
73- venue <- stringr :: str_split(game_details ," ," )[[1 ]][2 ] %> % trimws()
74-
75- # Get Game date
76- game_details_date <- x %> %
77- rvest :: html_node(" .lnormtop tr:nth-child(3) .lnorm" ) %> %
78- rvest :: html_text()
7966
80- # Again, we have to extract the details
81- game_date <- stringr :: str_split(game_details_date ," ," )[[1 ]][2 ] %> %
82- trimws() %> %
83- lubridate :: dmy()
67+ game_details <- x %> %rvest :: html_node(" tr:nth-child(2) .lnorm" ) %> %rvest :: html_text(trim = TRUE )
68+ Round <- strsplit(game_details ," ," ,fixed = TRUE )[[1 ]][1 ] %> % trimws()
69+ venue <- strsplit(game_details ," ," ,fixed = TRUE )[[1 ]][2 ] %> % trimws()
70+
71+ game_details_date <- x %> %rvest :: html_node(" .lnormtop tr:nth-child(3) .lnorm" ) %> %rvest :: html_text(trim = TRUE )
72+ game_date <- strsplit(game_details_date ," ," ,fixed = TRUE )[[1 ]][2 ] %> % trimws() %> %lubridate :: dmy()
8473season <- lubridate :: year(game_date )
85-
86- # Get home and away team names
87- home_team <- x %> %
88- rvest :: html_node(" #matchscoretable tr:nth-child(2) a" ) %> %
89- rvest :: html_text()
90-
91- away_team <- x %> %
92- rvest :: html_node(" #matchscoretable tr~ tr+ tr a" ) %> %
93- rvest :: html_text()
94-
95- # Now get the table data. The Home Team is in the 13th table
96-
74+
75+ home_team <- x %> %rvest :: html_node(" #matchscoretable tr:nth-child(2) a" ) %> %rvest :: html_text(trim = TRUE )
76+ away_team <- x %> %rvest :: html_node(" #matchscoretable tr~ tr+ tr a" ) %> %rvest :: html_text(trim = TRUE )
77+
78+ # normalise player column name before any across/select
79+ standardise_player_col <- function (df ) {
80+ if (! is.data.frame(df )|| ! nrow(df ))return (df )
81+ # replace NBSP & trim
82+ names(df )<- names(df )| >
83+ stringr :: str_replace_all(" \u 00A0" ," " )| >
84+ trimws()
85+ cand <- which(tolower(names(df ))%in% c(" player" ," name" ," players" ," player_name" ))
86+ if (! length(cand ))cand <- 1L
87+ names(df )[cand [1 ]]<- " Player"
88+ df
89+ }
90+
9791home_stats <- x %> %
9892rvest :: html_nodes(" table" ) %> %
9993. [[13 ]] %> %
10094rvest :: html_table(header = TRUE ) %> %
95+ tibble :: as_tibble() %> %
96+ janitor :: remove_empty(" cols" ) %> %
97+ standardise_player_col() %> %
10198dplyr :: mutate(
10299Team = home_team ,
103100Opposition = away_team ,
104101Status = " Home"
105102 ) %> %
106- dplyr :: mutate(
107- dplyr :: across(
108- dplyr :: where(is.character ),
109- ~ dplyr :: na_if(.x ," Unused Substitute" )
110- )
111- ) %> %
112103dplyr :: mutate(dplyr :: across(
113- c(- " Player" ,- " Team" ,- " Opposition" ,- " Status" ),
114- as.numeric
104+ dplyr :: where(is.character ),
105+ ~ dplyr :: na_if(.x ," Unused Substitute" )
106+ )) %> %
107+ dplyr :: mutate(dplyr :: across(
108+ - tidyselect :: any_of(c(" Player" ," Team" ," Opposition" ," Status" )),
109+ ~ suppressWarnings(as.numeric(gsub(" [^0-9.-]" ," " ,.x )))
115110 ))
116-
117- # Now get the table data
111+
118112away_stats <- x %> %
119113rvest :: html_nodes(" table" ) %> %
120114. [[18 ]] %> %
121115rvest :: html_table(header = TRUE ) %> %
116+ tibble :: as_tibble() %> %
117+ janitor :: remove_empty(" cols" ) %> %
118+ standardise_player_col() %> %
122119dplyr :: mutate(
123120Team = away_team ,
124121Opposition = home_team ,
125122Status = " Away"
126123 ) %> %
127- dplyr :: mutate(
128- dplyr :: across(
129- dplyr :: where(is.character ),
130- ~ dplyr :: na_if(.x ," Unused Substitute" )
131- )
132- ) %> %
133124dplyr :: mutate(dplyr :: across(
134- c(- " Player" ,- " Team" ,- " Opposition" ,- " Status" ),
135- as.numeric
125+ dplyr :: where(is.character ),
126+ ~ dplyr :: na_if(.x ," Unused Substitute" )
127+ )) %> %
128+ dplyr :: mutate(dplyr :: across(
129+ - tidyselect :: any_of(c(" Player" ," Team" ," Opposition" ," Status" )),
130+ ~ suppressWarnings(as.numeric(gsub(" [^0-9.-]" ," " ,.x )))
136131 ))
137-
138- # # Add data to ind.table
139- player_stats <- home_stats %> %
140- dplyr :: bind_rows(away_stats ) %> %
132+
133+ player_stats <- dplyr :: bind_rows(home_stats ,away_stats ) %> %
141134dplyr :: mutate(
142135Round = Round ,
143136Venue = venue ,
144137Season = season ,
145138Date = game_date ,
146139Match_id = id
147140 ) %> %
148- dplyr :: select(
149- " Date" ,
150- " Season" ,
151- " Round" ,
152- " Venue" ,
153- " Player" ,
154- " Team" ,
155- " Opposition" ,
156- " Status" ,
157- dplyr :: everything()
158- )
159-
141+ dplyr :: relocate(Date ,Season ,Round ,Venue ,Player ,Team ,Opposition ,Status ,.before = dplyr :: everything()) %> %
142+ janitor :: remove_empty(" cols" )
143+
160144 names(player_stats )<- make.names(names(player_stats ))
161-
162- return (player_stats )
145+ player_stats
163146}
164147
165-
166-
167148# ' Helper function for \code{get_footywire_stats}
168149# '
169150# ' @param id A match id from afltables