omplotr: 'ggplot2' Based RNAseq Plot Function Collection

Installation and loading

Install the latest version from GitHub as follow:

# Install
if(!require(devtools)) install.packages("devtools")
devtools::install_github("bioShaun/omplotr")

Theme

theme_onmath is a ggplot theme used in almost all rnaseq plots.

library(omplotr)
p <- ggplot(mtcars) + geom_point(aes(x = wt, y = mpg,colour = factor(gear)))
p + theme_onmath() + ggtitle("theme_onmath")

Plot

functions to generate plot in ngs analysis

QC

Reads GC distribution

# Fastqc GC result
head(gc_test_data, 4)
#>   X.Base     sample variable     value
#> 1      1 DZ-A-LDY-1        A 0.1644290
#> 2      2 DZ-A-LDY-1        A 0.2451466
#> 3      3 DZ-A-LDY-1        A 0.2764013
#> 4      4 DZ-A-LDY-1        A 0.3381226

# lineplot of GC distribution across Fastq file
gc_line_plot(gc_test_data)

Reads Quality barplot

# Reads Quality result
# Bars of Quality <= 30 were marked with color 'dodgerblue', 
# Bars of Quality > 30 were marked with color 'navy'.
head(rq_test_data, 4)
#>   Quality Count   Proportion      color     sample
#> 1      11    57 1.016564e-06 dodgerblue YP-B-WYX-6
#> 2      12  7352 1.311189e-04 dodgerblue YP-B-WYX-6
#> 3      13 40981 7.308736e-04 dodgerblue YP-B-WYX-6
#> 4      14 57256 1.021129e-03 dodgerblue YP-B-WYX-6

# Reads Quality barplot
reads_quality_plot(rq_test_data)

Quant

expression box, violin and density plot

# expression matrix
head(exp_test_data, 4)
#>                 9dpi-pj-jp1 9dpi-pj-jp2 9dpi-pj-jp3 9dpi-pj-jp4
#> ENSRNA049464904      64.515      48.860      34.595      25.636
#> ENSRNA049468231   17763.048   28554.280    4878.607   12802.249
#> ENSRNA049468277     544.106    1152.839     169.713     497.665
#> ENSRNA049471043    4926.117    7815.150    1198.127    4545.421

# sample information
head(test_sample_data, 4)
#>   condition      sample
#> 1 9dpi-pj-A 9dpi-pj-jp1
#> 2 9dpi-pj-A 9dpi-pj-jp2
#> 3 9dpi-pj-B 9dpi-pj-jp3
#> 4 9dpi-pj-B 9dpi-pj-jp4

# boxplot
om_boxplot(exp_test_data, test_sample_data, 'box')

# violin
om_boxplot(exp_test_data, test_sample_data, 'violin')

# density
om_boxplot(exp_test_data, test_sample_data, 'density')

# merged plot
om_boxplot(exp_test_data, test_sample_data, 'all')

expression PCA analysis point plot

om_pca_plot(exp_test_data, test_sample_data)

expression correlation heatmap

om_correlation_plot(exp_test_data, test_sample_data)

diff expression volcano plot

# diff result
head(diff_test_data, 4)
#>                  Gene_ID X9dpi.pj.jp1 X9dpi.pj.jp2 X9dpi.pj.jp3
#> 21437       Os01g0977250        4.780        4.551        7.806
#> 33806       Os03g0738600        1.209        0.946        1.338
#> 30663       Os03g0823900        5.598        5.303        6.322
#> 32700 EPlOSAG00000051674        0.000        0.000        1.482
#>       X9dpi.pj.jp4       logFC    PValue       FDR          compare
#> 21437        6.489 -0.21257899 0.3964249 0.6631611 Case1_vs_Control
#> 33806        1.392  0.03988567 0.9082303 1.0000000 Case1_vs_Control
#> 30663        7.596  0.04831892 0.8428859 0.9858939 Case1_vs_Control
#> 32700        0.000 -2.62518576 1.0000000 1.0000000 Case1_vs_Control

# plot volcano plot for a single compare
om_volcano_plot(diff_test_data, 'Case_vs_Control')

# plot volcano plot for merged results
om_volcano_plot(diff_test_data, 'ALL')

expression heatmap

# plot expression heatmap
om_heatmap(exp_test_data, test_sample_data)

expression cluster line plot

# cluster result
head(cluster_test_data, 4)
#>     cluster            Gene_id    variable     value
#> 1 cluster_1 EPlOSAG00000008604 9dpi-pj-jp1 0.4935853
#> 2 cluster_1 EPlOSAG00000018483 9dpi-pj-jp1 0.5236810
#> 3 cluster_1 EPlOSAG00000027962 9dpi-pj-jp1 0.5008437
#> 4 cluster_1 EPlOSAG00000045132 9dpi-pj-jp1 0.4879498

# cluster plot
om_cluster_plot(cluster_test_data)

Enrichment analysis

GO enrichment analysis

# diff genes
test_diff_genes <- go_test_data_list[['test_diff_genes']]
head(test_diff_genes, 4)
#> [1] "Os01g0150000" "Os01g0172600" "Os01g0220700" "Os01g0303600"

# gene length
test_gene_len <- go_test_data_list[['test_gene_len']]
head(test_gene_len, 4)
#>        gene_id gene_len
#> 1 Os01g0290700     2289
#> 2 Os01g0249200     5219
#> 3 Os01g0152200     3747
#> 4 Os01g0295700     5836

# get go annotation file
gene_go_map <- system.file("extdata", "topgo_test_data.txt", package = "omplotr")
gene_go_map_df <- data.table::fread(gene_go_map, header = F)
head(gene_go_map_df, 4)
#>              V1                               V2
#> 1: Os01g0166300                       GO:0017176
#> 2: Os01g0296700 GO:0016787,GO:0005975,GO:0004553
#> 3: Os01g0290700            GO:0005524,GO:0016887
#> 4: Os01g0236400            GO:0005737,GO:0005856

# run goseq and show result
goseq_output <- om_goseq(test_diff_genes, test_gene_len, gene_go_map)
head(goseq_output, 4)
#>       category over_represented_pvalue    qvalue numDEInCat numInCat
#> 571 GO:0010287             0.008881002 0.3812842          2        2
#> 848 GO:0042803             0.008933210 0.3812842          2        2
#> 76  GO:0001172             0.024128606 0.3812842          2        3
#> 123 GO:0003968             0.024128606 0.3812842          2        3
#>                                     term ontology
#> 571                        plastoglobule       CC
#> 848    protein homodimerization activity       MF
#> 76          transcription, RNA-templated       BP
#> 123 RNA-directed RNA polymerase activity       MF
#>                         DE_id
#> 571 Os01g0173000,Os01g0118000
#> 848 Os01g0235200,Os01g0251000
#> 76  Os01g0198000,Os01g0198100
#> 123 Os01g0198000,Os01g0198100

# go enrichment bar plot
go_enrich_file <- system.file("extdata", "enrichment", 
                              "example.go.enrichment.txt", 
                              package = "omplotr")
go_enrich_list <- clean_enrich_table(go_enrich_file)
om_enrich_bar_plot(go_enrich_list$table, ylab_title=go_enrich_list$title)

GO DAG graph

# run topGO
om_topgo(gene_go_map, test_diff_genes, goseq_output)

Mapping

mapping_stats <- system.file("extdata", "all_sample.mapping.xls", package = "omplotr")

# show mapping stats table
mapping_df <- read.delim(mapping_stats)
head(mapping_df, 4)
#>                       Item        A        F1       K1        M1        M
#> 1               sequences: 95851866 102408480 88220198 102921732 89688164
#> 2            reads mapped: 94204523  98758455 87279503 101428039 88480886
#> 3 reads mapped and paired: 93189512  98569864 87122868 101234396 87992732
#> 4          reads unmapped:  1647343   3650025   940695   1493693  1207278
#>          W
#> 1 89827664
#> 2 88521129
#> 3 88075758
#> 4  1306535

# show mapping summary
om_bwa_mapping_plot(mapping_stats, 5)

# show detail sample information
om_bwa_mapping_plot(mapping_stats, 10)

Reads Coverage

cov_stats <- system.file("extdata", "all_sample.genome.cov.xls", package = "omplotr")

# show coverage table
cov_df <- read.delim(cov_stats)
head(cov_df, 4)
#>   Depth      A     F1     K1     M1      M      W
#> 1     1 876435 764784 944681 786463 814983 857249
#> 2     2 630273 573112 761490 564038 600328 635532
#> 3     3 553156 479804 677398 484714 513316 544938
#> 4     4 523375 436214 652812 453986 470339 503152

# show coverage summary
om_reads_cov_plot(cov_stats, 5, 100)

# show detail sample information
om_reads_cov_plot(cov_stats, 10, 100)

Variant Distribution

# transform variant summary data
stats_names <- om_const_reseq_variant[['var_file_labs']][1:2]
stats_dir <- system.file("extdata", "variant_stats", 
                         package = "omplotr")
impact_map_file <- system.file("extdata", "variant_stats", "snpeff_varEffects.csv", 
                               package = "omplotr")
test_pie_stats <- lapply(stats_names, om_var_pie_stats, 
                         stats_dir=stats_dir, 
                         impact_map_file=impact_map_file)
head(test_pie_stats[[1]], 4)
#>   Type variable      value Percentage     fig
#> 1  DEL        A 0.09495793      9.50% varType
#> 2  INS        A 0.09794564      9.79% varType
#> 3  SNP        A 0.80709642     80.71% varType
#> 4  DEL       F1 0.09068346      9.07% varType

# plot pie plot for each sample
a_om_test_var_stats <- dplyr::filter(om_test_var_stats[[1]], variable == 'A')
om_var_pie_plot(a_om_test_var_stats)

# variant summary boxplot
om_test_var_stats_df <- plyr::ldply(om_test_var_stats, data.frame)
om_var_summary_plot(om_test_var_stats_df)