|
| 1 | +# Declare globals to avoid R CMD check notes |
| 2 | +utils::globalVariables(c("Team","type","Home.Team","Away.Team","Games","where","all_of","setNames")) |
| 3 | + |
| 4 | +#' Fetch Team Statistics from AFLTables |
| 5 | +#' |
| 6 | +#' Scrapes team-level statistics from AFLTables.com for a given season. |
| 7 | +#' |
| 8 | +#' @param season Integer. A year between 1965 and 2025. |
| 9 | +#' @param summary_type Character. Either `"totals"` (default) or `"averages"`. |
| 10 | +#' |
| 11 | +#' @return A data frame with team stats in `_for`, `_against`, and `_diff` format. |
| 12 | +#' @export |
| 13 | +#' |
| 14 | +#' @examples |
| 15 | +#' fetch_team_stats(2024) |
| 16 | +#' fetch_team_stats(2023, summary_type = "averages") |
| 17 | +fetch_team_stats<-function(season,summary_type="totals") { |
| 18 | +if (!is.numeric(season)||season<1965||season>2025) { |
| 19 | + stop("Season must be a numeric value between 1965 and 2025.") |
| 20 | + } |
| 21 | + |
| 22 | +team_name_map<- c( |
| 23 | +"Adelaide"="Adelaide", |
| 24 | +"Brisbane Lions"="Brisbane Lions", |
| 25 | +"Carlton"="Carlton", |
| 26 | +"Collingwood"="Collingwood", |
| 27 | +"Essendon"="Essendon", |
| 28 | +"Fremantle"="Fremantle", |
| 29 | +"Geelong"="Geelong", |
| 30 | +"Gold Coast"="Gold Coast", |
| 31 | +"Greater Western Sydney"="GWS", |
| 32 | +"Hawthorn"="Hawthorn", |
| 33 | +"Melbourne"="Melbourne", |
| 34 | +"North Melbourne"="North Melbourne", |
| 35 | +"Port Adelaide"="Port Adelaide", |
| 36 | +"Richmond"="Richmond", |
| 37 | +"St Kilda"="St Kilda", |
| 38 | +"Sydney"="Sydney", |
| 39 | +"West Coast"="West Coast", |
| 40 | +"Footscray"="Western Bulldogs"# Standardize older data |
| 41 | + ) |
| 42 | + |
| 43 | +url<- paste0("https://afltables.com/afl/stats/",season,"s.html") |
| 44 | +page<-rvest::read_html(url) |
| 45 | +tables<-page %>%rvest::html_elements("table") |
| 46 | +if (length(tables)<3) stop("Insufficient tables found on the page for season:",season) |
| 47 | + |
| 48 | +team_totals_for<-tables[[2]] %>%rvest::html_table(fill=TRUE) |
| 49 | +team_totals_against<-tables[[3]] %>%rvest::html_table(fill=TRUE) |
| 50 | + |
| 51 | + colnames(team_totals_for)[1]<-"Team" |
| 52 | + colnames(team_totals_against)[1]<-"Team" |
| 53 | +team_totals_for$type<-"for" |
| 54 | +team_totals_against$type<-"against" |
| 55 | + |
| 56 | +team_stats<-dplyr::bind_rows(team_totals_for,team_totals_against) %>% |
| 57 | +dplyr::filter(Team!="Totals") %>% |
| 58 | +dplyr::mutate(dplyr::across(-c(Team,type),~ as.numeric(.))) |
| 59 | + |
| 60 | +team_stats$Team<-dplyr::recode(team_stats$Team,!!!team_name_map) |
| 61 | + |
| 62 | +team_stats_wide<-team_stats %>% |
| 63 | +tidyr::pivot_wider( |
| 64 | +names_from=type, |
| 65 | +values_from=-c(Team,type), |
| 66 | +names_sep="_" |
| 67 | + ) |
| 68 | + |
| 69 | +for_cols<- grep("_for$", names(team_stats_wide),value=TRUE) |
| 70 | +against_cols<- gsub("_for$","_against",for_cols) |
| 71 | +diff_cols<- gsub("_for$","_diff",for_cols) |
| 72 | + |
| 73 | +diff_list<- setNames( |
| 74 | + Map(function(f,a)team_stats_wide[[f]]-team_stats_wide[[a]],for_cols,against_cols), |
| 75 | +diff_cols |
| 76 | + ) |
| 77 | + |
| 78 | +team_stats_final<-dplyr::bind_cols(team_stats_wide,tibble::as_tibble(diff_list)) %>% |
| 79 | +dplyr::mutate(season=season) %>% |
| 80 | +dplyr::relocate(season,.before=Team) |
| 81 | + |
| 82 | +if (summary_type=="averages") { |
| 83 | +game_counts<-fitzRoy::fetch_results_afltables(season) %>% |
| 84 | +dplyr::filter(!is.na(Home.Team),!is.na(Away.Team)) %>% |
| 85 | +tidyr::pivot_longer(cols= c(Home.Team,Away.Team),names_to="HomeAway",values_to="Team") %>% |
| 86 | +dplyr::count(Team,name="Games") |
| 87 | + |
| 88 | +team_stats_final<-dplyr::left_join(team_stats_final,game_counts,by="Team") |
| 89 | + |
| 90 | +numeric_cols<-team_stats_final %>% |
| 91 | +dplyr::select(-season,-Team,-Games) %>% |
| 92 | +dplyr::select(where(is.numeric)) %>% |
| 93 | + names() |
| 94 | + |
| 95 | +team_stats_final<-team_stats_final %>% |
| 96 | +dplyr::mutate(dplyr::across(all_of(numeric_cols),~./Games)) |
| 97 | + } |
| 98 | + |
| 99 | +return(team_stats_final) |
| 100 | +} |