MethReg
can be used to generate testable hypothesis on the synergistic
interaction of DMRs and TFs in gene regulation.
MethReg
can be used either to evaluate regulatory potentials of
candidate regions or to search for methylation coupled TF regulatory
processes in the entire genome.
You can install the MethReg from Bioconductor with:
BiocManager::install("MethReg")
This is a basic example which shows you how to use the package:
library(MethReg)
#---------------------------------------
# Data input
#---------------------------------------
# 1) Gene expression matrix
# 2) DNA methylation
# With same column names
data("dna.met.chr21")
data("gene.exp.chr21.log2")
all(colnames(dna.met.chr21) == colnames(gene.exp.chr21.log2))
#> [1] TRUE
# Since we are working with regions we need to map our 450k array to regions
dna.met.chr21 <- make_dnam_se(dna.met.chr21)
#---------------------------------------
# Mapping regions
#---------------------------------------
# For each region get target gene and predicted TF biding to the regions
# get_triplet incorporates two other functions:
# 1) get_region_target_gene
# 2) get_tf_in_region
triplet <- create_triplet_distance_based(
region = rownames(dna.met.chr21),
motif.search.window.size = 50,
motif.search.p.cutoff = 10^-3,
target.method = "genes.promoter.overlap",
genome = "hg19",
cores = 1
)
#> Finding target genes
#> Mapping regions to the closest gene
#> Looking for TFBS
#>
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:base':
#>
#> expand.grid
#>
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:base':
#>
#> strsplit
#> Joining, by = "regionID"
#---------------------------------------
# Evaluate two models:
#---------------------------------------
# 1) target gene ~ TF + DNAm + TF * DNAm
# 2) target gene ~ TF + DNAm_group + TF * DNAm_group
# where DNAm_group is a binary indicator if the sample belongs to: Q4 or Q1
results <- interaction_model(
triplet = triplet,
dnam = dna.met.chr21,
exp = gene.exp.chr21.log2
)
head(results)
#> regionID target_gene_name target
#> 1 chr21:30372219-30372220 RPL23P2 ENSG00000176054
#> 2 chr21:30430511-30430512 AF129075.5 ENSG00000231125
#> 3 chr21:33109780-33109781 AP000255.6 ENSG00000273091
#> 4 chr21:40692859-40692860 BRWD1 ENSG00000185658
#> 5 chr21:43982646-43982647 AP001625.6 ENSG00000235772
#> 6 chr21:43983587-43983588 AP001625.6 ENSG00000235772
#> TF_external_gene_name TF TF_symbol target_symbol met.IQR
#> 1 ETS2 ENSG00000157557 ETS2 RPL23P2 0.182568764
#> 2 BACH1 ENSG00000156273 BACH1 AF129075.2 0.310379208
#> 3 GABPA ENSG00000154727 GABPA AP000255.1 0.080160919
#> 4 GABPA ENSG00000154727 GABPA BRWD1 0.040638333
#> 5 GABPA ENSG00000154727 GABPA AP001625.2 0.008670064
#> 6 GABPA ENSG00000154727 GABPA AP001625.2 0.014175786
#> quant_pval_metGrp quant_fdr_metGrp quant_pval_rna.tf quant_fdr_rna.tf
#> 1 3.953828e-05 0.0001186148 5.958024e-03 1.787407e-02
#> 2 7.437666e-01 0.7437665914 6.570509e-04 6.570509e-04
#> 3 2.208774e-03 0.0022087741 5.803942e-01 5.803942e-01
#> 4 3.823862e-01 0.3823861582 7.142112e-08 7.142112e-08
#> 5 4.434057e-01 0.4434057288 4.977765e-02 4.977765e-02
#> 6 5.619040e-01 0.9778889092 1.305771e-03 2.611541e-03
#> quant_pval_metGrp:rna.tf quant_fdr_metGrp:rna.tf quant_estimate_metGrp
#> 1 3.768097e-05 0.0001130429 -83.041219
#> 2 7.945988e-01 0.7945988318 -3.533136
#> 3 2.305241e-03 0.0023052406 -30.858474
#> 4 4.999473e-01 0.4999473203 -3.386762
#> 5 4.663539e-01 0.4663538741 -10.643704
#> 6 5.234533e-01 0.9782246238 -5.525037
#> quant_estimate_rna.tf quant_estimate_metGrp:rna.tf Model.quantile
#> 1 -2.0395999 3.8764266 Robust Linear Model
#> 2 1.3437155 0.1642009 Robust Linear Model
#> 3 0.2627746 1.8528803 Robust Linear Model
#> 4 1.3876278 0.1522536 Robust Linear Model
#> 5 1.1156152 0.5884195 Robust Linear Model
#> 6 1.2125286 0.3594288 Robust Linear Model
#> Wilcoxon_pval_target_q4met_vs_q1met Wilcoxon_pval_tf_q4met_vs_q1met
#> 1 0.42735531 0.5707504
#> 2 0.47267559 0.4726756
#> 3 0.49466484 0.9097219
#> 4 0.03763531 0.7913368
#> 5 0.44952133 0.2730363
#> 6 1.00000000 0.2730363
#> % of 0 target genes (Q1 and Q4)
#> 1 0 %
#> 2 5 %
#> 3 20 %
#> 4 0 %
#> 5 10 %
#> 6 5 %
sessionInfo()
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Catalina 10.15.6
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats4 parallel stats graphics grDevices utils datasets
#> [8] methods base
#>
#> other attached packages:
#> [1] BSgenome.Hsapiens.UCSC.hg19_1.4.3 BSgenome_1.56.0
#> [3] rtracklayer_1.48.0 Biostrings_2.56.0
#> [5] XVector_0.28.0 GenomicRanges_1.40.0
#> [7] GenomeInfoDb_1.24.2 IRanges_2.22.2
#> [9] S4Vectors_0.26.1 sesameData_1.6.0
#> [11] ExperimentHub_1.15.3 AnnotationHub_2.20.2
#> [13] BiocFileCache_1.12.1 dbplyr_1.4.4
#> [15] BiocGenerics_0.34.0 MethReg_0.99.11
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.3.1 backports_1.1.10
#> [3] plyr_1.8.6 BiocParallel_1.22.0
#> [5] ggplot2_3.3.2 TFBSTools_1.26.0
#> [7] digest_0.6.25 foreach_1.5.0
#> [9] htmltools_0.5.0 GO.db_3.11.4
#> [11] magrittr_1.5 memoise_1.1.0
#> [13] doParallel_1.0.15 sfsmisc_1.1-7
#> [15] openxlsx_4.1.5 readr_1.3.1
#> [17] annotate_1.66.0 matrixStats_0.56.0
#> [19] R.utils_2.10.1 JASPAR2020_0.99.10
#> [21] prettyunits_1.1.1 colorspace_1.4-1
#> [23] blob_1.2.1 rappdirs_0.3.1
#> [25] haven_2.3.1 xfun_0.17
#> [27] dplyr_1.0.2 crayon_1.3.4
#> [29] RCurl_1.98-1.2 TFMPvalue_0.0.8
#> [31] iterators_1.0.12 glue_1.4.2
#> [33] gtable_0.3.0 sesame_1.6.0
#> [35] zlibbioc_1.34.0 DelayedArray_0.14.1
#> [37] car_3.0-9 wheatmap_0.1.0
#> [39] Rhdf5lib_1.10.1 HDF5Array_1.16.1
#> [41] abind_1.4-5 scales_1.1.1
#> [43] pscl_1.5.5 DBI_1.1.0
#> [45] rstatix_0.6.0 Rcpp_1.0.5
#> [47] xtable_1.8-4 progress_1.2.2
#> [49] foreign_0.8-80 bit_4.0.4
#> [51] preprocessCore_1.50.0 httr_1.4.2
#> [53] RColorBrewer_1.1-2 ellipsis_0.3.1
#> [55] pkgconfig_2.0.3 XML_3.99-0.5
#> [57] R.methodsS3_1.8.1 DNAcopy_1.62.0
#> [59] tidyselect_1.1.0 rlang_0.4.7
#> [61] reshape2_1.4.4 later_1.1.0.1
#> [63] AnnotationDbi_1.50.3 munsell_0.5.0
#> [65] BiocVersion_3.11.1 cellranger_1.1.0
#> [67] tools_4.0.2 DirichletMultinomial_1.30.0
#> [69] generics_0.0.2 RSQLite_2.2.0
#> [71] broom_0.7.0 evaluate_0.14
#> [73] stringr_1.4.0 fastmap_1.0.1
#> [75] yaml_2.2.1 knitr_1.29
#> [77] bit64_4.0.5 zip_2.1.1
#> [79] caTools_1.18.0 purrr_0.3.4
#> [81] randomForest_4.6-14 KEGGREST_1.28.0
#> [83] mime_0.9 R.oo_1.24.0
#> [85] poweRlaw_0.70.6 pracma_2.2.9
#> [87] compiler_4.0.2 curl_4.3
#> [89] png_0.1-7 interactiveDisplayBase_1.26.3
#> [91] ggsignif_0.6.0 tibble_3.0.3
#> [93] stringi_1.5.3 forcats_0.5.0
#> [95] lattice_0.20-41 CNEr_1.24.0
#> [97] Matrix_1.2-18 vctrs_0.3.4
#> [99] pillar_1.4.6 lifecycle_0.2.0
#> [101] BiocManager_1.30.10 data.table_1.13.0
#> [103] bitops_1.0-6 httpuv_1.5.4
#> [105] R6_2.4.1 promises_1.1.1
#> [107] rio_0.5.16 codetools_0.2-16
#> [109] MASS_7.3-53 gtools_3.8.2
#> [111] assertthat_0.2.1 seqLogo_1.54.3
#> [113] rhdf5_2.32.2 SummarizedExperiment_1.18.2
#> [115] GenomicAlignments_1.24.0 Rsamtools_2.4.0
#> [117] GenomeInfoDbData_1.2.3 hms_0.5.3
#> [119] motifmatchr_1.10.0 grid_4.0.2
#> [121] tidyr_1.1.2 rmarkdown_2.3
#> [123] carData_3.0-4 ggpubr_0.4.0
#> [125] Biobase_2.48.0 shiny_1.5.0