|
| 1 | +--- |
| 2 | +title:"Zonal Statistics Report" |
| 3 | +output:html_document |
| 4 | +knit:(function(inputFile, encoding) { |
| 5 | +source("config.R"); |
| 6 | +rmarkdown::render( |
| 7 | +inputFile, |
| 8 | +encoding = encoding, |
| 9 | +output_file = OUTPUT_REPORT_NAME |
| 10 | +)}) |
| 11 | +--- |
| 12 | + |
| 13 | +```{r setup, include = FALSE} |
| 14 | +knitr::opts_chunk$set(echo = FALSE) |
| 15 | +
|
| 16 | +library(terra) |
| 17 | +library(exactextractr) |
| 18 | +
|
| 19 | +source("config.R") |
| 20 | +
|
| 21 | +if (!file.exists(RASTER_DATA_FILE)) { |
| 22 | + stop("File '", RASTER_DATA_FILE, "' does not exist! This should be a table with three columns describing the (formatted) variable name, the data source name of the file, and the number of digits to display after the decimal point in aggregate results.") |
| 23 | +} |
| 24 | +``` |
| 25 | + |
| 26 | +This report calculates user-specified[percentiles](https://en.wikipedia.org/wiki/Percentile) for raster data within polygons. |
| 27 | + |
| 28 | +Percentiles are an excellent way to describe range and central tendency for a dataset. Spatially-weighted percentiles, via the`exactextractr::exact_extract()` method, allow us to account for fractional overlap of polygons and individual raster cells. |
| 29 | + |
| 30 | +```{r calculate-statistics} |
| 31 | +# load and subset polygons |
| 32 | +mupolygon <- terra::vect(MU_DSN, MU_LAYER) |
| 33 | +mupolygon.sub <- mupolygon[which(mupolygon[[MU_COL]][[1]] %in% as.character(MU_SET)), ] |
| 34 | +mupolygon.sub$Acres <- terra::expanse(mupolygon.sub) / 4086.46 |
| 35 | +
|
| 36 | +raster.data <- read.csv(RASTER_DATA_FILE) |
| 37 | +
|
| 38 | +if (!"path" %in% colnames(raster.data)) { |
| 39 | + stop("File '", RASTER_DATA_FILE, "' must specify a column called 'path' containing the file path or GDAL data source specification for a raster source") |
| 40 | +} |
| 41 | +
|
| 42 | +if (!"variable" %in% colnames(raster.data)) { |
| 43 | + stop("File '", RASTER_DATA_FILE, "' must specify a column called 'variable' containing formatted names for each raster source") |
| 44 | +} |
| 45 | +
|
| 46 | +if (!"digits" %in% colnames(raster.data)) { |
| 47 | + stop("File '", RASTER_DATA_FILE, "' must specify a column called 'digits' containing the number of digits after the decimal place to use for rounding aggregate outputs") |
| 48 | +} |
| 49 | +
|
| 50 | +# create spatrasters |
| 51 | +spatraster.list <- lapply(raster.data$path, terra::rast) |
| 52 | +
|
| 53 | +# this aggregation is only needed if you want group statistics |
| 54 | +if (!BY_POLYGON) { |
| 55 | + mupolygon.group <- terra::aggregate(mupolygon.sub, MU_COL, fun = "sum") |
| 56 | +} else { |
| 57 | + mupolygon.group <- mupolygon.sub |
| 58 | +} |
| 59 | +
|
| 60 | +# prepare sf object and final attributes |
| 61 | +mupolygon.sf <- sf::st_as_sf(mupolygon.group) |
| 62 | +mupolygon.attr <- as.data.frame(mupolygon.sf)[MU_COL] |
| 63 | +
|
| 64 | +# name cleaning function (TODO: cover more edge cases) |
| 65 | +makeName <- function(x) { |
| 66 | + gsub("[^a-z0-9 ]", "_", make.names(tolower(x), unique = TRUE)) |
| 67 | +} |
| 68 | +
|
| 69 | +# extract and apply aggregation function |
| 70 | +sampling.time <- system.time({ |
| 71 | + res <- lapply(seq_len(length(spatraster.list)), function(i) { |
| 72 | + r <- spatraster.list[[i]] |
| 73 | + ressub <- do.call(exactextractr::exact_extract, c(list( |
| 74 | + x = r, |
| 75 | + y = sf::st_transform(mupolygon.sf, terra::crs(r)), |
| 76 | + fun = FUN, |
| 77 | + progress = FALSE, |
| 78 | + force_df = TRUE, |
| 79 | + weights = 'area' # TODO: allow custom weight raster? |
| 80 | + ), ARGS)) |
| 81 | + d <- raster.data[i, 'digits'] |
| 82 | + if (!is.na(d)) { |
| 83 | + ressub <- round(ressub, d) |
| 84 | + } |
| 85 | + colnames(ressub) <- paste0(makeName(names(r)), "_", names(ressub)) |
| 86 | + cbind(mupolygon.attr, ressub) |
| 87 | + }) |
| 88 | +}) |
| 89 | +names(res) <- raster.data$variable |
| 90 | +
|
| 91 | +mupolygon.attr$`Polygon Count` <- aggregate(seq_len(nrow(mupolygon.sub)), by = list(mupolygon.sub[[MU_COL]][[1]]), length)$x |
| 92 | +mupolygon.attr$`Total Acres` <- round(mupolygon.group$sum_Acres) |
| 93 | +
|
| 94 | +poly_acres <- aggregate(mupolygon.sub$Acres, by = list(mupolygon.sub[[MU_COL]][[1]]), |
| 95 | + function(x) setNames(round(quantile(x)), paste0("Q", c(0, 25, 50, 75, 100)))) |
| 96 | +colnames(poly_acres)[[1]] <- MU_COL |
| 97 | +colnames(poly_acres[[2]]) <- paste("Acres", colnames(poly_acres[[2]])) |
| 98 | +poly_summary <- cbind(mupolygon.attr, poly_acres[[2]]) |
| 99 | +``` |
| 100 | + |
| 101 | +##Input Data Summary |
| 102 | + |
| 103 | +Input polygon boundary groups from '`r MU_DSN`' '`r MU_LAYER`' are as follows: |
| 104 | + |
| 105 | +```{r polygon-table} |
| 106 | +knitr::kable(poly_summary, caption = "Polygon Area Summary") |
| 107 | +``` |
| 108 | + |
| 109 | +Rasters were summarized from the following data sources: |
| 110 | + |
| 111 | +```{r raster-table} |
| 112 | +knitr::kable(raster.data, caption = "Raster Data Sources") |
| 113 | +``` |
| 114 | + |
| 115 | +Completed raster extraction from`r sum(poly_summary[["Polygon Count"]])` polygons grouped by by "`r MU_COL`" in`r structure(sampling.time[3], class = "difftime", units = "secs")` seconds. |
| 116 | + |
| 117 | +##Results |
| 118 | + |
| 119 | +```{r display-html-tables, results='asis'} |
| 120 | +for (n in names(res)) |
| 121 | + print(knitr::kable(res[[n]], caption = paste("Zonal Statistics of", n, "by", MU_COL))) |
| 122 | +``` |
| 123 | + |
| 124 | +```{r write-wide-output, results='hide'} |
| 125 | +# create wide format output |
| 126 | +.merge <- \(x, y) merge(x, y, by = c("MUSYM", "Acres")) |
| 127 | +res2 <- Reduce(merge, res) |
| 128 | +
|
| 129 | +## too wide for html format |
| 130 | +# knitr::kable(res2, caption = paste("Zonal Statistics by", MU_COL)) |
| 131 | +
|
| 132 | +dir.create(OUTPUT_DIR, showWarnings = FALSE, recursive = TRUE) |
| 133 | +outfile <- normalizePath(file.path(OUTPUT_DIR, paste0(OUTPUT_BASENAME)), |
| 134 | + winslash = "/", |
| 135 | + mustWork = FALSE) |
| 136 | +foo <- write.csv(res2, paste0(outfile, ".csv"), row.names = FALSE) |
| 137 | +vct <- terra::writeVector(cbind(mupolygon.group, res2), paste0(outfile, ".gpkg")) |
| 138 | +``` |
| 139 | + |
| 140 | +Wide-format output written to file: |
| 141 | + |
| 142 | +-`r paste0(outfile, ".csv")` |
| 143 | +-`r paste0(outfile, ".gpkg")` |