diff --git a/R/plotting.R b/R/plotting.R index aefd4ed..440a2f0 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,4 +1,3 @@ -# TODO: add documentations for these functions #' Plotting theme for vimcheck #' #' @description @@ -17,6 +16,8 @@ #' #' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. #' +#' @keywords plotting +#' #' @export theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { ggplot2::theme_bw() + @@ -44,45 +45,36 @@ theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { #' Plotting functions for burden and impact diagnostics. This documentation #' holds all plotting functions for now. #' -#' @param d +#' @param data A `` that gives the comparison between VIMC-provided +#' and modeller-used demography values, in long-format. This is primarily +#' expected to be the output of `check_demography_alignment()` processed by +#' `prep_plot_demography()`. #' -#' @param fig_number +#' @param fig_number The figure number. #' #' @return A `` object that can be printed to screen in the plot frame #' or saved to an output device (i.e., saved as an image file). #' -#' @examples +#' @keywords plotting #' #' @export -plot_compare_demography <- function(d, fig_number) { - num_countries <- length(unique(d$country)) - names_melting_data <- c( - "scenario", - "age", - "year", - "expected", - "provided", - "difference" - ) - names_melting_by <- c("scenario", "age", "year") - - # TODO: use tidyr; BIG PICTURE: NO DATA PREP IN PLOTTING FUNCTIONS! - tot <- reshape2::melt(d[, names_melting_data], id.vars = names_melting_by) - dat <- tot %>% - dplyr::group_by(variable, scenario, year, age) %>% - dplyr::summarise(value = sum(value)) %>% - dplyr::mutate(millions = value / 1e6) %>% - dplyr::arrange(age) +plot_compare_demography <- function(data, fig_number) { + # NOTE: not implementing a lot of checks on inputs + checkmate::assert_tibble(data) + checkmate::assert_count(fig_number, positive = TRUE) # no zeros allowed # TODO: use .data and .vars, import or namespace functions - g <- ggplot(data = dat, aes(x = year, y = millions, fill = age)) + - geom_bar(stat = "identity") + + g <- ggplot( + data = data, + aes(x = year, y = millions, fill = age) + ) + + geom_col() + facet_wrap(~ scenario + variable, ncol = 3) + scale_fill_distiller(palette = "PuOr") + - geom_hline(yintercept = 0, color = 'red') + + geom_hline(yintercept = 0, colour = "red") + labs( x = "calendar year", - y = glue::glue("people (in millions"), + y = "people (in millions", title = glue::glue( "Fig. {fig_number}. Comparison between interpolated population and \\ cohort size ({num_countries} countries)." @@ -95,23 +87,24 @@ plot_compare_demography <- function(d, fig_number) { #' @name plotting #' -#' @param burden +#' @param burden_age A `` with the minimum column names +#' "age", "millions" #' #' @export -plot_age_patterns <- function(burden, fig_number) { - # TODO: REMOVE DATA PREP FROM PLOTTING FNS - d <- burden %>% - group_by(scenario, burden_outcome, age) %>% - summarise(millions = sum(value) / 1e6) +plot_age_patterns <- function(burden_age, fig_number) { + checkmate::assert_tibble(burden_age) + checkmate::assert_count(fig_number, positive = TRUE) # no zeros allowed - g <- ggplot(d, aes(x = age, y = millions)) + - geom_bar(stat = "identity") + + max_age <- max(burden_age$age) + 1 + + g <- ggplot(burden_age, aes(x = age, y = value_millions)) + + geom_col() + facet_grid( burden_outcome ~ scenario, scales = "free_y", labeller = labeller(scenario = label_wrap_gen(10)) ) + - coord_cartesian(xlim = c(0, max(d$age) + 1)) + + coord_cartesian(xlim = c(NA_real_, max_age)) + # no lower limit, expect > 0 labs( y = "people (in millions)", title = glue::glue( @@ -125,29 +118,16 @@ plot_age_patterns <- function(burden, fig_number) { #' @name plotting #' -#' @param year_max +#' @param burden_decades A `` giving the burden by decade, up to +#' `year_max`. This should be the output of [prep_plot_burden_decades()]. #' #' @export -plot_global_burden_decades <- function(burden, year_max, fig_number) { - # TODO: prefer moving these conditional checks elsewhere - # TODO: prefer moving data prep outside plotting fn - stopifnot(year_max %% 10 == 0) - d <- burden %>% - filter(year <= year_max) %>% - mutate(year2 = ifelse(year == year_max, year_max - 1, year)) %>% - mutate(decade = floor(year2 / 10) * 10) %>% - mutate( - decade_label = ifelse( - decade == year_max - 10, - paste0(decade, "-", decade + 10), - paste0(decade, "-", decade + 9) - ) - ) %>% - group_by(scenario, burden_outcome, decade_label) %>% - summarise(millions = sum(value) / 1e6) - - g <- ggplot(d, aes(x = scenario, y = millions, fill = scenario)) + - geom_bar(stat = "identity") + +plot_global_burden_decades <- function(burden_decades, fig_number) { + g <- ggplot( + burden_decades, + aes(x = scenario, y = millions, fill = scenario) + ) + + geom_col() + facet_grid(burden_outcome ~ decade_label, scales = "free_y") + labs( y = "people (in millions)", @@ -169,33 +149,38 @@ plot_global_burden_decades <- function(burden, year_max, fig_number) { #' @name plotting #' -#' @param d -#' -#' @param outcome +#' @param burden_data This is expected to be a `` from a +#' nested-`` constructed using [prep_plot_global_burden()]. #' #' @export -plot_global_burden <- function(d, outcome, fig_number) { - data_ <- dplyr::filter( - d, - .data$burden_outcome == outcome +plot_global_burden <- function(burden_data, outcome_name, fig_number) { + checkmate::assert_tibble(burden_data) + checkmate::assert_subset( + outcome_name, + burden_outcome_names ) + checkmate::assert_count(fig_number) + + scenarios_per_gridcol <- 10 g <- ggplot( - data, - aes(x = year, y = millions, fill = age) + burden_data, + aes(year, millions, fill = age) ) + - geom_bar(stat = "identity", aes(fill = age)) + + geom_col() + facet_grid( ~scenario, scales = "free_y", - labeller = labeller(scenario = label_wrap_gen(10)) # TODO: avoid magic numbers + labeller = labeller( + scenario = label_wrap_gen(scenarios_per_gridcol) + ) ) + scale_fill_distiller(palette = "Spectral") + labs( x = "calendar year", - y = paste(outcomes_list[i], "(in millions)"), # TODO: where is outcomes_list!? + y = glue::glue("{outcome_name} (in millions)"), title = glue::glue( - "Fig. {fig_number}: Global trends of disease burden ({outcome})." + "Fig. {fig_number}: Global trends of disease burden ({outcome_name})." ) ) + theme_vimc(x_text_angle = 90) @@ -205,43 +190,43 @@ plot_global_burden <- function(d, outcome, fig_number) { #' @name plotting #' -#' @param cov +#' @param coverage_set A `` that is the output of +#' [prep_plot_coverage_set()]. #' #' @export -plot_coverage_set <- function(cov, figure_number) { - # TODO: remove data prep - no_vacc <- expand_grid( - year = unique(cov$year), - country = unique(cov$country) - ) %>% - mutate( - coverage = 0, - delivery = "none", - scenario_description = "No vaccination" - ) +plot_coverage_set <- function(coverage_set, figure_number) { + checkmate::assert_tibble(coverage_set) + checkmate::assert_count(figure_number) + + dodge_width <- 0.9 + col_opacity <- 0.7 + scenarios_per_gridcol <- 10 - cov1 <- cov %>% - mutate(delivery = paste(vaccine, activity_type, sep = "-")) %>% - select(scenario_description, delivery, country, year, coverage) %>% - bind_rows(no_vacc) + disease_name <- unique(coverage_set$disease) # expecting a single value - g <- ggplot(cov1, aes(x = year, y = coverage, fill = delivery)) + - geom_bar( - stat = "identity", - position = position_dodge(width = 0.9), - alpha = 0.7 + g <- ggplot( + coverage_set, + aes(x = year, y = coverage, fill = delivery) + ) + + geom_col( + # TODO: check if dodging is really needed + position = position_dodge(dodge_width), + alpha = col_opacity ) + facet_grid( - "country ~ scenario_description", + rows = vars(country), + cols = vars(scenario_description), scales = "free_y", - labeller = labeller(scenario_description = label_wrap_gen(10)) + labeller = labeller( + scenario_description = label_wrap_gen(scenarios_per_gridcol) + ) ) + - scale_x_continuous(breaks = pretty(unique(cov1$year))) + + scale_x_continuous() + labs( x = "calendar year", y = "Proportion of target population", title = glue::glue( - "Fig. {figure_number}: Coverage sets for {cov$disease[1]}" + "Fig. {figure_number}: Coverage sets for {disease_name}" ) ) + theme_vimc(90) @@ -251,63 +236,41 @@ plot_coverage_set <- function(cov, figure_number) { #' @name plotting #' -#' @param fvp -#' -#' @param year_min -#' -#' @param year_max +#' @param fvp_data A `` of estimates of fully-vaccinated persons (FVPs) per +#' scenario, with scenarios as factors in order of the number of adjusted-FVPs. +#' Expected to be the output of [prep_plot_fvp()]. #' #' @export -plot_fvp <- function(fvp, year_min, year_max, figure_number) { - # TODO: PREFER TO REMOVE DATA PREP CODE - no_vacc <- expand_grid( - year = unique(fvp$year), - country = unique(fvp$country) - ) %>% - mutate(fvps_adjusted = 0, scenario_description = "No vaccination") - - fvp_final <- bind_rows(fvp, no_vacc) %>% - filter(year >= year_min & year <= year_max) %>% - mutate(scenario = as.factor(scenario_description)) +plot_fvp <- function(fvp_data, figure_number) { + checkmate::assert_tibble(fvp_data) + checkmate::assert_count(figure_number) - fvp_final$scenario <- relevel(fvp_final$scenario, "No vaccination") - fvp_final$scenario <- gsub(tolower(fvp$disease[1L]), "", fvp_final$scenario) - fvp_final$scenario <- gsub("-", " ", fvp_final$scenario) + scenarios_per_gridcol <- 10 + scale_millions <- 1e-6 + disease_name <- unique(fvp_data$disease) - scenario_order <- c(names(sort(tapply( - fvp_final$fvps_adjusted, - fvp_final$scenario, - sum, - na.rm = TRUE - )))) - - fvp_final$scenario <- forcats::fct_relevel(fvp_final$scenario, scenario_order) - - fvp_agg <- - fvp_final %>% - dplyr::group_by(year, scenario, disease) %>% - dplyr::summarise(fvp = sum(fvps_adjusted, na.rm = TRUE)) - - # TODO: prefer to use a scale transform rather than touching data - g <- ggplot(fvp_agg, aes(x = year, y = fvp / 1e6, fill = scenario)) + - geom_bar( - stat = "identity", - colour = "midnightblue", + g <- ggplot( + fvp_data, + aes(x = year, y = fvp, fill = scenario) + ) + + geom_col( fill = "midnightblue" ) + facet_grid( ~scenario, scales = "free_y", - labeller = labeller(scenario = label_wrap_gen(10)) + labeller = labeller(scenario = label_wrap_gen(scenarios_per_gridcol)) + ) + + scale_x_continuous() + + scale_y_continuous( + labels = scales::label_number(scale = scale_millions) ) + - scale_x_continuous(breaks = pretty(unique(fvp_agg$year))) + - ylab(paste("FVP (in millions)")) + labs( x = "calendar year", y = "FVP (in millions)", title = glue::glue( "Fig. {figure_number}: Fully Vaccinated Persons at global level by \\ - scenario for {fvp$disease[1L]}" + scenario for {disease_name}" ) ) + theme_vimc(90, legend.position = "none") diff --git a/R/plotting_prep.R b/R/plotting_prep.R new file mode 100644 index 0000000..37c3780 --- /dev/null +++ b/R/plotting_prep.R @@ -0,0 +1,247 @@ +#' Prepare data for plotting +#' +#' @name plotting_prep +#' @rdname plotting_prep +#' +#' @description +#' Convert the output of [check_demography_alignment()] to a long-format tibble. +#' +#' @param data For `prep_plot_demography()`, a `` output from +#' [check_demography_alignment()]. +#' +#' @param burden For `prep_plot_age()`, +#' +#' @return +#' +#' - For `prep_plot_demography()`: a `` in long-format, with the +#' identifier-columns, "scenario", "age", and "year", with the added column +#' "value_millions". +#' +#' - For `prep_plot_age()`: +#' +#' @export +prep_plot_demography <- function(data) { + checkmate::assert_tibble(data) + + # NOTE: this data is expected to come from `check_demography_alignment()` + # there are expected to be more colnames: abs_diff, prop_diff + names_melting_data <- c( + "scenario", + "age", + "year", + "expected", + "provided", + "difference" + ) + + checkmate::assert_names( + colnames(data), + must.include = names_melting_data + ) + + num_countries <- length(unique(data$country)) + + names_melting_by <- c("scenario", "age", "year") + + data <- dplyr::select( + data, + {{ names_melting_data }} + ) + + data <- tidyr::pivot_longer( + data, + !{{ names_melting_by }} + ) + + data <- dplyr::summarise( + data, + value = sum(.data$value), + .groups = c("variable", "scenario", "year", "age") + ) + data <- dplyr::mutate( + data, + value_millions = .data$value / 1e6 + ) + data <- dplyr::arrange( + data, + "age" + ) + + data +} + +#' @name plotting_prep +#' +#' @export +prep_plot_age <- function(burden) { + checkmate::assert_tibble(burden) + + data <- dplyr::summarise( + burden, + value_millions = sum(.data$value) / 1e6, + .groups = c("scenario", "burden_outcome", "age") + ) + + data +} + +#' @name plotting_prep +#' +#' @export +prep_plot_burden_decades <- function(burden, year_max) { + # TODO: add colnames check + # TODO: general: make validator for burden data + checkmate::assert_tibble(burden) + + is_decade <- year_max %% 10 == 0 + if (!is_decade) { + cli::cli_abort( + "Expected {.code year_max} to be a multiple of 10, but got value \\ + {.code year_max}." + ) + } + + last_decade <- year_max - 10 + + data <- data[data$year <= year_max, ] + data <- dplyr::mutate( + data, + year = pmin( + .data$year, + .data$year_max - 1 + ), + decade = floor(.data$year / 10) * 10, + decade_label = dplyr::if_else( + .data$decade == last_decade, + glue::glue("{.data$decade}-{.data$decade + 10}"), + glue::glue("{.data$decade}-{.data$decade + 9}") + ) + ) + + data <- dplyr::summarise( + data, + value_millions = sum(.data$value) / 1e6, + .groups = c("scenario", "burden_outcome", "decade_label") + ) + + data +} + +#' @name plotting_prep +#' +#' @export +prep_plot_global_burden <- function(burden) { + # TODO: add colnames check + checkmate::assert_tibble(burden) + + nesting_cols <- "outcome" + + # create a nested tibble with a list column named "burden_data" + burden_nested <- tidyr::nest( + burden, + .by = {{ nesting_cols }}, + .key = burden_data + ) + + burden_nested +} + +#' @name plotting_prep +#' +#' @export +prep_plot_coverage_set <- function(coverage) { + checkmate::assert_tibble(coverage) + + years <- unique(coverage$year) + countries <- unique(coverage$country) + coverage_level <- 0 + delivery <- "none" + scenario_description <- "No vaccination" + + no_vax <- tidyr::crossing( + year = years, + country = countries, + coverage = coverage_level, + delivery = delivery, + scenario_description = scenario_description + ) + + coverage_set <- dplyr::mutate( + coverage, + delivery = glue::glue("{.data$vaccine}-{.data$activity_type}") + ) + cols_to_select <- c( + "scenario_description", + "delivery", + "country", + "year", + "coverage" + ) + coverage_set <- dplyr::select( + coverage_set, + {{ cols_to_select }} + ) + coverage_set <- dplyr::bind_rows( + coverage_set, + no_vax + ) + + coverage_set +} + +#' @name plotting_prep +#' +#' @export +prep_plot_fvp <- function(fvp, year_min, year_max) { + checkmate::assert_tibble(fvp) + checkmate::assert_count(year_min) + checkmate::assert_count(year_max) + + years <- unique(coverage$year) + countries <- unique(coverage$country) + fvps_adjusted <- 0 + scenario_description <- "No vaccination" + + no_vax <- tidyr::crossing( + year = years, + country = countries, + fvps_adjusted = fvps_adjusted, + scenario_description = scenario_description + ) + + fvp_final <- dplyr::bind_rows(fvp, no_vax) + fvp_final <- dplyr::filter( + fvp_final, + dplyr::between(.data$year, year_min, year_max) + ) + fvp_final <- dplyr::mutate( + fvp_final, + scenario = forcats::fct_relevel( + .data$scenario_description, + "No vaccination" + ) # convert characters to factors and set first level + ) + + # TODO: need to see an example to figure this out + fvp_final$scenario <- gsub(tolower(fvp$disease[1L]), "", fvp_final$scenario) + fvp_final$scenario <- gsub("-", " ", fvp_final$scenario) + + # determine scenario order in terms of total adjusted FVPs per scenario + scenario_order <- names(sort( + tapply( + fvp_final$fvps_adjusted, + fvp_final$scenario, + sum, + na.rm = TRUE + ) + )) + + fvp_final$scenario <- forcats::fct_relevel(fvp_final$scenario, scenario_order) + + fvp_agg <- dplyr::summarise( + fvp = sum(.data$fvps_adjusted, na.rm = TRUE), + .groups = c("year", "scenario", "disease") + ) + + fvp_agg +}