AegisFunc
AegisFunc is a tool for Spatio-temporal epidemiology based on OHDSI CDM
v5.4.0.
AegisFunc originated from the Aegis project that is an open-source
spatial analysis tool based on CDM. https://github.com/ABMI/Aegis.
Requirements
OHDSI CDM v5.4.0
- We use latitude and longitude fields in Location table (updated on v5.4.0).
INLA package
- INLA package is not supported by CRAN.
- You can install manually from the r-inla repository
install.packages("INLA", repos = c(getOption("repos"), INLA = "https://inla.r-inla-download.org/R/stable"), dep = TRUE)
R v4.2
- Latest version of INLA requires R v4.2 or above
Installation
You can install the development version of AegisFunc like so:
devtools::install()
Load
Load AegisFunc package before use.
library(AegisFunc)
Check your CDM database
Database connection
- Connection string of SQL Server: “jdbc:sqlserver://localhost:1433;databaseName=cdm;user=user;password=password”
- Connection string of PostgreSQL: “jdbc:postgresql://localhost:5432/cdm?user=user&password=password”
We have tested with SQL Server 2019.
dbms <- "sql server"
path_to_driver <- getwd()
connection_string <- "jdbc:sqlserver://[SERVER_IP]:[SERVER_PORT];user=[USER_ID];password=[USER_PW];databaseName=[CDM_DB_NAME]"
conn_info <- get_connection_details(
dbms = dbms,
path_to_driver = path_to_driver,
connection_string = connection_string
)
Check CDM version
You must confirm your CDM version.
Because we only support for CDM v5.4.0 or above.
conn_info <- conn_info
cdm_database_schema <- "[CDM_DB_SCHEMA]"
cdm_source <- get_cdm_source(
conn_info = conn_info,
cdm_database_schema = cdm_database_schema
)
cdm_version <- cdm_source[c("cdm_version")]
Load cohort list
Query defined cohort list.
conn_info <- conn_info
result_database_schema <- "[RESULT_DB_SCHEMA]"
cohort_list <- get_cohort_list_table(
conn_info = conn_info,
result_database_schema = result_database_schema
)
Forecasting (temporal model)
Step 01. Prepare Data
Get cohort table from CDM/Atlas database
model <- "temporal"
conn_info <- conn_info
cdm_database_schema <- "[CDM_DB_SCHEMA]"
result_database_schema <- "[RESULT_DB_SCHEMA]"
target_cohort_definition_id <- "1"
outcome_cohort_definition_id <- "2"
cohort_table <- get_cohort_analysis_table(
model = model,
conn_info = conn_info,
cdm_database_schema = cdm_database_schema,
result_database_schema = result_database_schema,
target_cohort_definition_id = target_cohort_definition_id,
outcome_cohort_definition_id = outcome_cohort_definition_id
)
Step 02. Calculate forecasting
Calculate forecasting
model <- "temporal"
table <- cohort_table
observation_end_date <- "2008-01-01"
prediction_end_date <- "2009-08-01"
variables_type = "day,season,month,week"
deriv <- calculate_forecasting(
model = model,
table = table,
observation_end_date = observation_end_date,
prediction_end_date = prediction_end_date,
variables_type = variables_type
)
Step 03. Plot
Plot with data
data <- deriv
plot <- get_forecasting_plot(
data = data
)
plot
Disease Map/Cluster (spatial model)
Step 01. Prepare Data
Get cohort table from CDM/Atlas database
model <- "spatial"
conn_info <- conn_info
cdm_database_schema <- "[CDM_DB_SCHEMA]"
result_database_schema <- "[RESULT_DB_SCHEMA]"
target_cohort_definition_id <- "1"
outcome_cohort_definition_id <- "2"
cohort_start_date <- "2020-01-01"
cohort_end_date <- "2020-12-31"
time_at_risk_start_date <- "0"
time_at_risk_end_date <- "0"
time_at_risk_end_date_panel <- "cohort_start_date" # "cohort_start_date" or "cohort_end_date"
cohort_table <- get_cohort_analysis_table(
model = model,
conn_info = conn_info,
cdm_database_schema = cdm_database_schema,
result_database_schema = result_database_schema,
target_cohort_definition_id = target_cohort_definition_id,
outcome_cohort_definition_id = outcome_cohort_definition_id,
cohort_start_date = cohort_start_date,
cohort_end_date = cohort_end_date,
time_at_risk_start_date = time_at_risk_start_date,
time_at_risk_end_date = time_at_risk_end_date,
time_at_risk_end_date_panel = time_at_risk_end_date_panel
)
Read geo data
name <- "KOR" # "GADM" or "KOR"
country <- "KOR"
level <- "2"
geo <- get_geo_data(
name = name,
country = country,
level = level
)
Map cohort table (lat/long) with geo data
latlong <- cohort_table
geo <- geo
geo_map <- map_latlong_geo(
latlong = latlong,
geo = geo
)
Arrange table
model <- "spatial"
table <- geo_map
table_arr <- calculate_count_with_geo_oid(
model = model,
table = table
)
Step 02. Adjustment
Adjustment for age and sex
model <- "spatial"
table <- table_arr
mode <- "std" # "std" or "crd"
fraction <- "100000"
conf_level <- "0.95"
table_adj <- calculate_adjust_age_sex_indirectly(
model = model,
table = table,
mode = mode,
fraction = fraction,
conf_level = conf_level
)
Step 03-1. Calculate disease map
Generate graph file from geo data
geo <- geo
graph_file_path <- trans_geo_to_graph(
geo = geo
)
Calculate disease map
model <- "spatial"
table <- table_adj
graph_file_path <- graph_file_path
deriv <- calculate_disease_map(
model = model,
table = table,
graph_file_path = graph_file_path
)
Step 03-2. Calculate disease cluster
Calculate disease cluster
model <- "spatial"
table <- table_adj
deriv <- calculate_disease_cluster(
model = model,
table = table
)
Step 04. Plot
Merge geo data with derivatives
geo <- geo
deriv_arr <- deriv$arranged_table
data <- merge_geo_with_deriv(
geo = geo,
deriv = deriv_arr
)
Plot with data
data <- data
stats <- deriv$stats
color_type <- "colorQuantile"
color_param <- base::list(
palette = "Reds",
domain = NULL,
bins = 7,
pretty = TRUE,
n = 4,
levels = NULL,
ordered = FALSE,
na.color = "#FFFFFF",
alpha = FALSE,
reverse = FALSE,
right = FALSE
)
plot <- get_leaflet_map(
data = data,
stats = stats,
color_type = color_type,
color_param = color_param
)
plot
Disease Map/Cluster (spatio-temporal model)
Step 01. Prepare Data
Get cohort table from CDM/Atlas database
model <- "spatio-temporal"
conn_info <- conn_info
cdm_database_schema <- "[CDM_DB_SCHEMA]"
result_database_schema <- "[RESULT_DB_SCHEMA]"
target_cohort_definition_id <- "1"
outcome_cohort_definition_id <- "2"
cohort_start_date <- "2020-01-01"
cohort_end_date <- "2020-12-31"
time_at_risk_start_date <- "0"
time_at_risk_end_date <- "0"
time_at_risk_end_date_panel <- "cohort_start_date" # "cohort_start_date" or "cohort_end_date"
cohort_table <- get_cohort_analysis_table(
model = model,
conn_info = conn_info,
cdm_database_schema = cdm_database_schema,
result_database_schema = result_database_schema,
target_cohort_definition_id = target_cohort_definition_id,
outcome_cohort_definition_id = outcome_cohort_definition_id,
cohort_start_date = cohort_start_date,
cohort_end_date = cohort_end_date,
time_at_risk_start_date = time_at_risk_start_date,
time_at_risk_end_date = time_at_risk_end_date,
time_at_risk_end_date_panel = time_at_risk_end_date_panel
)
Read geo data
name <- "KOR" # "GADM" or "KOR"
country <- "KOR"
level <- "2"
geo <- get_geo_data(
name = name,
country = country,
level = level
)
Map cohort table (lat/long) with geo data
latlong <- cohort_table
geo <- geo
geo_map <- map_latlong_geo(
latlong = latlong,
geo = geo
)
Arrange table
model <- "spatio-temporal"
table <- geo_map
table_arr <- calculate_count_with_geo_oid(
model = model,
table = table
)
Step 02. Adjustment
Adjustment for age and sex
model <- "spatio-temporal"
table <- table_arr
mode <- "std" # "std" or "crd"
fraction <- "100000"
conf_level <- "0.95"
table_adj <- calculate_adjust_age_sex_indirectly(
model = model,
table = table,
mode = mode,
fraction = fraction,
conf_level = conf_level
)
Step 03-1. Calculate disease map
Generate graph file from geo data
geo <- geo
graph_file_path <- trans_geo_to_graph(
geo = geo
)
Calculate disease map
model <- "spatio-temporal"
table <- table_adj
graph_file_path <- graph_file_path
deriv <- calculate_disease_map(
model = model,
table = table,
graph_file_path = graph_file_path
)
Step 03-2. Calculate disease cluster
Calculate disease cluster
model <- "spatio-temporal"
table <- table_adj
deriv <- calculate_disease_cluster(
model = model,
table = table
)
Step 04. Plot
Check names of derivatives
years <- names(deriv)
Merge geo data with derivatives
data <- base::list()
for(i in 1:length(years)) {
geo <- geo
deriv_arr <- deriv[years[i]][[1]]$arranged_table
data <- append(data, list(merge_geo_with_deriv(
geo = geo,
deriv = deriv_arr
)))
}
names(data) <- years
Make plots
plot <- base::list()
for(i in 1:length(years)) {
data <- data[years[i]][[1]]
stats <- deriv[years[i]][[1]]$stats
color_type <- "colorQuantile"
color_param <- base::list(
palette = "Reds",
domain = NULL,
bins = 7,
pretty = TRUE,
n = 9,
levels = NULL,
ordered = FALSE,
na.color = "#FFFFFF",
alpha = FALSE,
reverse = FALSE,
right = FALSE
)
plot <- append(plot, list(get_leaflet_map(
data = data,
stats = stats,
color_type = color_type,
color_param = color_param
)))
}
names(plot) <- years
Check names of derivatives
idx <- names(plot)
Plot with index
plot[idx[1]][[1]]