Wrapper function to add classifications to dataframe
wlangera opened this issue · 0 comments
wlangera commented
Hi
For users who want more flexibility (create their own plots etc.), it might be nice to add effect classifications with their descriptions to a dataframe as ordered factor variables.
If you are interested I can make a pull request (not sure if I am happy with the function name, see roxygen at the end of this issue).
# Load packages
library(rlang)
library(dplyr)
# Function to add effects
add_classification_as_factor <- function(
df,
cl_columns,
threshold,
reference = 0,
coarse = TRUE) {
# Classify effects with effectclass
classified_df <- df %>%
mutate(effect_code = effectclass::classification(
lcl = !!sym(cl_columns[1]),
ucl = !!sym(cl_columns[2]),
threshold = threshold,
reference = reference)
)
# Add coarse classification if specified
if (coarse) {
classified_df$effect_code_coarse <- effectclass::coarse_classification(
classified_df$effect_code)
}
# Create ordered factors of effects
out_df <- classified_df %>%
mutate(
effect_code = factor(.data$effect_code,
levels = c(
"++",
"+",
"+~",
"~",
"-~",
"-",
"--",
"?+",
"?-",
"?"),
ordered = TRUE),
effect = case_when(
effect_code == "++" ~ "strong increase",
effect_code == "+" ~ "increase",
effect_code == "+~" ~ "moderate increase",
effect_code == "~" ~ "stable",
effect_code == "-~" ~ "moderate decrease",
effect_code == "-" ~ "decrease",
effect_code == "--" ~ "strong decrease",
effect_code == "?+" ~ "potential increase",
effect_code == "?-" ~ "potential decrease",
effect_code == "?" ~ "unknown"
),
effect = factor(.data$effect,
levels = c(
"strong increase",
"increase",
"moderate increase",
"stable",
"moderate decrease",
"decrease",
"strong decrease",
"potential increase",
"potential decrease",
"unknown"),
ordered = TRUE)
)
if (coarse) {
out_df <- out_df %>%
mutate(
effect_code_coarse = factor(.data$effect_code_coarse,
levels = c(
"+",
"~",
"-",
"?"),
ordered = TRUE),
effect_coarse = case_when(
effect_code_coarse == "+" ~ "increase",
effect_code_coarse == "-" ~ "decrease",
effect_code_coarse == "~" ~ "stable",
effect_code_coarse == "?" ~ "unknown"
),
effect_coarse = factor(.data$effect_coarse,
levels = c(
"increase",
"stable",
"decrease",
"unknown"),
ordered = TRUE)
)
}
return(out_df)
}
# Example dataframe
data_estimates <- data.frame(
year = seq(2011, 2016),
estimate = c(0.535, 0.069, 0.328, 0.358, 0.189, 0.551),
ll = c(0.315, 0.032, 0.095, 0.086, 0.019, 0.311),
ul = c(0.697, 0.536, 0.549, 0.763, 0.411, 0.681)
)
# Add effects with function
data_estimates_effects <- add_classification_as_factor(
df = data_estimates,
cl_columns = c("ll", "ul"),
threshold = 0.1,
reference = 0.6,
coarse = TRUE)
# How does the dataframe look like?
glimpse(data_estimates_effects)
#> Rows: 6
#> Columns: 8
#> $ year <int> 2011, 2012, 2013, 2014, 2015, 2016
#> $ estimate <dbl> 0.535, 0.069, 0.328, 0.358, 0.189, 0.551
#> $ ll <dbl> 0.315, 0.032, 0.095, 0.086, 0.019, 0.311
#> $ ul <dbl> 0.697, 0.536, 0.549, 0.763, 0.411, 0.681
#> $ effect_code <ord> ?-, -, -, ?, --, ?-
#> $ effect_code_coarse <ord> ?, -, -, ?, -, ?
#> $ effect <ord> potential decrease, decrease, decrease, unknown, st…
#> $ effect_coarse <ord> unknown, decrease, decrease, unknown, decrease, unk…
# The effects are ordered factors
data_estimates_effects$effect_code
#> [1] ?- - - ? -- ?-
#> Levels: ++ < + < +~ < ~ < -~ < - < -- < ?+ < ?- < ?
data_estimates_effects$effect
#> [1] potential decrease decrease decrease unknown
#> [5] strong decrease potential decrease
#> 10 Levels: strong increase < increase < moderate increase < ... < unknown
Created on 2024-07-08 with reprex v2.1.0
Here is the full function with roxygen documentation:
#' Add effect classifications to a dataframe by comparing the confidence
#' intervals with a reference and thresholds
#'
#' This function adds classified effects to a dataframe as ordered factor
#' variables by comparing the confidence intervals with a reference and
#' thresholds. A wrapper around `effectclass::classify()` and
#' `effectclass::coarse_classification()`.
#'
#' @param df A dataframe containing summary data of confidence limits.
#' @param cl_columns A vector of 2 column names in `df` indicating respectively
#' the lower and upper confidence limits.
#' @param threshold A vector of either 1 or 2 thresholds. A single threshold
#' will be transformed into `reference + c(-abs(threshold), abs(threshold))`.
#' See `effectclass::classify()`.
#' @param reference The null hypothesis. Defaults to 0.
#' See `effectclass::classify()`.
#' @param coarse Logical, defaults to `TRUE`. If `TRUE`, add a coarse
#' classification to the dataframe.
#' See `effectclass::coarse_classification()`.
#'
#' @returns The returned value is the original dataframe with added columns
#' `effect_code` and `effect` containing respectively the effect symbols and
#' descriptions as ordered factor variables. In case or `coarse = TRUE` (by
#' default) also `effect_code_coarse` and `effect_coarse` containing the coarse
#' classification effects.
add_classification_as_factor <- function(
df,
cl_columns,
threshold,
reference = 0,
coarse = TRUE) {
require("dplyr")
require("rlang")
# Classify effects with effectclass
classified_df <- df %>%
mutate(effect_code = effectclass::classification(
lcl = !!sym(cl_columns[1]),
ucl = !!sym(cl_columns[2]),
threshold = threshold,
reference = reference)
)
# Add coarse classification if specified
if (coarse) {
classified_df$effect_code_coarse <- effectclass::coarse_classification(
classified_df$effect_code)
}
# Create ordered factors of effects
out_df <- classified_df %>%
mutate(
effect_code = factor(.data$effect_code,
levels = c(
"++",
"+",
"+~",
"~",
"-~",
"-",
"--",
"?+",
"?-",
"?"),
ordered = TRUE),
effect = case_when(
effect_code == "++" ~ "strong increase",
effect_code == "+" ~ "increase",
effect_code == "+~" ~ "moderate increase",
effect_code == "~" ~ "stable",
effect_code == "-~" ~ "moderate decrease",
effect_code == "-" ~ "decrease",
effect_code == "--" ~ "strong decrease",
effect_code == "?+" ~ "potential increase",
effect_code == "?-" ~ "potential decrease",
effect_code == "?" ~ "unknown"
),
effect = factor(.data$effect,
levels = c(
"strong increase",
"increase",
"moderate increase",
"stable",
"moderate decrease",
"decrease",
"strong decrease",
"potential increase",
"potential decrease",
"unknown"),
ordered = TRUE)
)
if (coarse) {
out_df <- out_df %>%
mutate(
effect_code_coarse = factor(.data$effect_code_coarse,
levels = c(
"+",
"~",
"-",
"?"),
ordered = TRUE),
effect_coarse = case_when(
effect_code_coarse == "+" ~ "increase",
effect_code_coarse == "-" ~ "decrease",
effect_code_coarse == "~" ~ "stable",
effect_code_coarse == "?" ~ "unknown"
),
effect_coarse = factor(.data$effect_coarse,
levels = c(
"increase",
"stable",
"decrease",
"unknown"),
ordered = TRUE)
)
}
return(out_df)
}