Wrapper function to add classifications to dataframe

Question

Wrapper function to add classifications to dataframe

wlangera opened this issue 4 months ago · 0 comments

Hi

For users who want more flexibility (create their own plots etc.), it might be nice to add effect classifications with their descriptions to a dataframe as ordered factor variables.
If you are interested I can make a pull request (not sure if I am happy with the function name, see roxygen at the end of this issue).

# Load packages
library(rlang)
library(dplyr)

# Function to add effects
add_classification_as_factor <- function(
  df,
  cl_columns,
  threshold,
  reference = 0,
  coarse = TRUE) {
  
  # Classify effects with effectclass
  classified_df <- df %>%
    mutate(effect_code = effectclass::classification(
      lcl = !!sym(cl_columns[1]),
      ucl = !!sym(cl_columns[2]),
      threshold = threshold,
      reference = reference)
    )
  
  # Add coarse classification if specified
  if (coarse) {
    classified_df$effect_code_coarse <- effectclass::coarse_classification(
      classified_df$effect_code)
  }
  
  # Create ordered factors of effects
  out_df <- classified_df %>%
    mutate(
      effect_code = factor(.data$effect_code,
                           levels = c(
                             "++",
                             "+",
                             "+~",
                             "~",
                             "-~",
                             "-",
                             "--",
                             "?+",
                             "?-",
                             "?"),
                           ordered = TRUE),
      effect = case_when(
        effect_code == "++" ~ "strong increase",
        effect_code == "+"  ~ "increase",
        effect_code == "+~" ~ "moderate increase",
        effect_code == "~"  ~ "stable",
        effect_code == "-~" ~ "moderate decrease",
        effect_code == "-"  ~ "decrease",
        effect_code == "--" ~ "strong decrease",
        effect_code == "?+" ~ "potential increase",
        effect_code == "?-" ~ "potential decrease",
        effect_code == "?"  ~ "unknown"
      ),
      effect = factor(.data$effect,
                      levels = c(
                        "strong increase",
                        "increase",
                        "moderate increase",
                        "stable",
                        "moderate decrease",
                        "decrease",
                        "strong decrease",
                        "potential increase",
                        "potential decrease",
                        "unknown"),
                      ordered = TRUE)
    )
  
  if (coarse) {
    out_df <- out_df %>%
      mutate(
        effect_code_coarse = factor(.data$effect_code_coarse,
                                    levels = c(
                                      "+",
                                      "~",
                                      "-",
                                      "?"),
                                    ordered = TRUE),
        effect_coarse = case_when(
          effect_code_coarse == "+" ~ "increase",
          effect_code_coarse == "-" ~ "decrease",
          effect_code_coarse == "~" ~ "stable",
          effect_code_coarse == "?" ~ "unknown"
        ),
        effect_coarse = factor(.data$effect_coarse,
                               levels = c(
                                 "increase",
                                 "stable",
                                 "decrease",
                                 "unknown"),
                               ordered = TRUE)
      )
  }
  
  return(out_df)
}

# Example dataframe
data_estimates <- data.frame(
  year = seq(2011, 2016),
  estimate = c(0.535, 0.069, 0.328, 0.358, 0.189, 0.551),
  ll = c(0.315, 0.032, 0.095, 0.086, 0.019, 0.311),
  ul = c(0.697, 0.536, 0.549, 0.763, 0.411, 0.681)
)

# Add effects with function
data_estimates_effects <- add_classification_as_factor(
  df = data_estimates,
  cl_columns = c("ll", "ul"),
  threshold = 0.1,
  reference = 0.6,
  coarse = TRUE)

# How does the dataframe look like?
glimpse(data_estimates_effects)
#> Rows: 6
#> Columns: 8
#> $ year               <int> 2011, 2012, 2013, 2014, 2015, 2016
#> $ estimate           <dbl> 0.535, 0.069, 0.328, 0.358, 0.189, 0.551
#> $ ll                 <dbl> 0.315, 0.032, 0.095, 0.086, 0.019, 0.311
#> $ ul                 <dbl> 0.697, 0.536, 0.549, 0.763, 0.411, 0.681
#> $ effect_code        <ord> ?-, -, -, ?, --, ?-
#> $ effect_code_coarse <ord> ?, -, -, ?, -, ?
#> $ effect             <ord> potential decrease, decrease, decrease, unknown, st…
#> $ effect_coarse      <ord> unknown, decrease, decrease, unknown, decrease, unk…

# The effects are ordered factors
data_estimates_effects$effect_code
#> [1] ?- -  -  ?  -- ?-
#> Levels: ++ < + < +~ < ~ < -~ < - < -- < ?+ < ?- < ?

data_estimates_effects$effect
#> [1] potential decrease decrease           decrease           unknown           
#> [5] strong decrease    potential decrease
#> 10 Levels: strong increase < increase < moderate increase < ... < unknown

^{Created on 2024-07-08 with reprex v2.1.0}

Here is the full function with roxygen documentation:

#' Add effect classifications to a dataframe by comparing the confidence
#' intervals with a reference and thresholds
#'
#' This function adds classified effects to a dataframe as ordered factor
#' variables by comparing the confidence intervals with a reference and
#' thresholds. A wrapper around `effectclass::classify()` and
#' `effectclass::coarse_classification()`.
#'
#' @param df A dataframe containing summary data of confidence limits.
#' @param cl_columns A vector of 2 column names in `df` indicating respectively
#' the lower and upper confidence limits.
#' @param threshold A vector of either 1 or 2 thresholds. A single threshold
#' will be transformed into `reference + c(-abs(threshold), abs(threshold))`.
#' See `effectclass::classify()`.
#' @param reference The null hypothesis. Defaults to 0.
#' See `effectclass::classify()`.
#' @param coarse Logical, defaults to `TRUE`. If `TRUE`, add a coarse
#' classification to the dataframe.
#' See `effectclass::coarse_classification()`.
#'
#' @returns The returned value is the original dataframe with added columns
#' `effect_code` and `effect` containing respectively the effect symbols and
#' descriptions as ordered factor variables. In case or `coarse = TRUE` (by
#' default) also `effect_code_coarse` and `effect_coarse` containing the coarse
#' classification effects.

add_classification_as_factor <- function(
    df,
    cl_columns,
    threshold,
    reference = 0,
    coarse = TRUE) {
  require("dplyr")
  require("rlang")

  # Classify effects with effectclass
  classified_df <- df %>%
    mutate(effect_code = effectclass::classification(
      lcl = !!sym(cl_columns[1]),
      ucl = !!sym(cl_columns[2]),
      threshold = threshold,
      reference = reference)
    )

  # Add coarse classification if specified
  if (coarse) {
    classified_df$effect_code_coarse <- effectclass::coarse_classification(
      classified_df$effect_code)
  }

  # Create ordered factors of effects
  out_df <- classified_df %>%
    mutate(
      effect_code = factor(.data$effect_code,
                      levels = c(
                        "++",
                        "+",
                        "+~",
                        "~",
                        "-~",
                        "-",
                        "--",
                        "?+",
                        "?-",
                        "?"),
                      ordered = TRUE),
      effect = case_when(
        effect_code == "++" ~ "strong increase",
        effect_code == "+"  ~ "increase",
        effect_code == "+~" ~ "moderate increase",
        effect_code == "~"  ~ "stable",
        effect_code == "-~" ~ "moderate decrease",
        effect_code == "-"  ~ "decrease",
        effect_code == "--" ~ "strong decrease",
        effect_code == "?+" ~ "potential increase",
        effect_code == "?-" ~ "potential decrease",
        effect_code == "?"  ~ "unknown"
      ),
      effect = factor(.data$effect,
                      levels = c(
                        "strong increase",
                        "increase",
                        "moderate increase",
                        "stable",
                        "moderate decrease",
                        "decrease",
                        "strong decrease",
                        "potential increase",
                        "potential decrease",
                        "unknown"),
                      ordered = TRUE)
    )

    if (coarse) {
      out_df <- out_df %>%
        mutate(
          effect_code_coarse = factor(.data$effect_code_coarse,
                               levels = c(
                                 "+",
                                 "~",
                                 "-",
                                 "?"),
                               ordered = TRUE),
          effect_coarse = case_when(
            effect_code_coarse == "+" ~ "increase",
            effect_code_coarse == "-" ~ "decrease",
            effect_code_coarse == "~" ~ "stable",
            effect_code_coarse == "?" ~ "unknown"
          ),
          effect_coarse = factor(.data$effect_coarse,
                                 levels = c(
                                   "increase",
                                   "stable",
                                   "decrease",
                                   "unknown"),
                                 ordered = TRUE)
        )
    }

  return(out_df)
}