R scripts for image ploting.
plot.blocks.R
Plot blocks (genome segments, centromere region or gene location)
# Prepare input data
perl block2draw.pl -t 5 -d 5 \
-s samples.list \ # sorted sample list
-b blocks.csv \ # block file
-g gene.bed \ # gene info, optional
-c centromere.bed \ # centromere position info, optional
-p test.draw.pos.csv \ # for ploting
> test.draw.csv # for ploting
# Plot blocks
Rscript plot.blocks.R \
test.draw.csv \
test.draw.pos.csv \
N/N,D/N,D/D,Gene,Centromere \ # legend names in order
15,100 \ # width and height
test.draw.pdf
stacked_histogram.error_bar.R
Plot stacked histogram with error bars and compare two sets of them. (Used in my paper Jia et al., 2020)
library(lattice)
library(plyr)
library(ggplot2)
library(Rmisc)
library(ggthemes)
library(tidyverse)
node.final <- read.table("node_stats.se.ac.txt", header=T, sep = "\t")
node.final$Varieties <- factor(node.final$Varieties,levels=c('SLX','CN422','ZN4','AZZ','AJNT','IR36','MH63',
'HHZ','93-11','PA64s','H1','H2','H3'))
node.final$Noden <- factor(node.final$Noden,levels=c('Node6','Node5','Node4','Node3','Node2','Node1'))
ggplot(node.final, aes(x=Varieties, y=Length, fill=Noden)) +
geom_bar(stat="identity",
colour="black",# Use black outlines,
size=.25) +
ylab("Node length")+
xlab("")+
geom_errorbar(aes(ymin=Ac_len-se, ymax=Ac_len+se), width=.15) + # Width of the error bars
facet_grid( .~ Group, scales = "free_x",space="free_x") +
theme(strip.text=element_text(face="bold",size=rel(1)),
panel.border = element_blank(),
axis.text = element_text(face = 'bold',size = rel(1),colour="black"),
axis.text.x = element_text(angle = 45, hjust = 0.3, vjust = 0.5),
axis.title = element_text(face = 'bold',size = rel(1)),
axis.line.x=element_line(colour="black"),
strip.background=element_rect(fill="#CFCFCF"),
panel.spacing=unit(0.5,"cm"),panel.background=element_rect(fill="#EBEBEB"))
ggsave(filename = "Nodes.length.pdf", dpi = 300, device = "pdf")
################################################################################
#compare 9311 and H1
node.c <- read.table("9311.H1.txt", header=T, sep = "\t")
node.c$Varieties <- factor(node.c$Varieties,levels=c('93-11','H1'))
node.c$Noden <- factor(node.c$Noden,levels=c('Node6','Node5','Node4','Node3','Node2','Node1'))
length <- read.table("9311.H1.ac_len.txt", header=T, sep = "\t")
ggplot(node.c, aes(x=Varieties, y=Length, fill=Noden)) +
geom_bar(stat="identity", colour="black", width = 0.5) +
geom_errorbar(aes(ymin=Ac_len-se, ymax=Ac_len+se), width=.15)+
#facet_grid( .~ Group, scales = "free_x",space="free_x")+
geom_segment(data=length, aes(x=1.25, xend=1.75, y=a1, yend=a2))+
ylab("")+
theme(strip.text=element_text(face="bold",size=rel(1.5)),
panel.border = element_blank(),
axis.text = element_text(face = 'bold',size = rel(1.5),colour="black"),
axis.title = element_text(face = 'bold',size = rel(1.5)),
axis.title.x = element_blank(),
legend.text = element_text(face = 'bold',size = rel(1)),
legend.title = element_text(face = 'bold',size = rel(1)),
axis.line.x=element_line(colour="black"),
strip.background=element_rect(fill="#CFCFCF"),
panel.spacing=unit(0.5,"cm"),panel.background=element_rect(fill="#EBEBEB"))
ggsave(filename = "9311.H1.ac_len.pdf", width=3.5, device = "pdf")
plot_multi_ordinates.R
Plot multiple ordinates image. (Used in my paper Jia et al., 2019)
#!plot CO(line), TE number(line), markers(bar)
library("ggplot2") # for the plot
library("ggrepel") # for spreading text labels on the plot
library("reshape2")
rm(list=ls())
data_sum <- read.table("summary.all.20180919.txt", header = T, sep = "\t")
data_mod <- melt(data_sum[,c(1,2,3,4,5)], id.vars=c("chr", "start", "end"), variable.name = "co_marker", value.name = "num")
centrome <- read.table("centromere.bed", sep = "\t", col.names = c("chr", "start", "end"))
sd1 <- read.table("SD1.bed", sep = "\t", col.names = c("chr", "start", "end", "name"))
ggplot(data=data_sum) +
facet_grid(chr~.)+
geom_area(data =data_sum, aes(x=(start + end)/2/1000000, y = te_prop),color = "#FFCC33", fill = "#FFCC33", stat = "identity")+
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
pdf(file="multi_ordinates.pdf", width = 12, height = 16)
ggplot(data=data_mod) +
geom_rect(data = sd1, aes(xmin=start/1000000, xmax=end/1000000, ymax=30, ymin=0), fill="#6600FF")+
geom_area(data =data_sum, aes(x=(start + end)/2/1000000, y = te_prop*30),color = "#FFCC33", fill = "#FFCC33", stat = "identity")+
geom_line(aes(x = (start + end)/2/1000000, y = num, color = co_marker), size = 0.5) +
#geom_line(aes(x = (start + end)/2/1000000, y = marker_den, color = "#085A9C"), size = 0.5) +
geom_rect(data = centrome, aes(xmin=start/1000000, xmax=end/1000000, ymax=30, ymin=0), fill="grey")+
geom_text_repel(data = sd1, aes(x = (start + end)/2/1000000, y = 30, label = name),
color = "black", fontface = "italic",show.legend = FALSE) +
scale_fill_discrete(guide=FALSE)+
scale_y_continuous(name = expression("Numbers"), sec.axis = sec_axis(~. * 1/ 30 , name = "Transposon proportion"), limits = c(0, 30)) +
scale_x_continuous(expand=c(0,0), breaks=seq(0, max((data_sum$start + data_sum$end)/2/1000000), 5)) +
facet_grid(chr~.)+
xlab("Length(Mbp)")+
scale_colour_manual(values=c("#EF0808","#085A9C")) +
#scale_x_continuous(labels = comma) +
theme_bw() +
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.text=element_text(face="bold",size=rel(1.5)),
axis.text = element_text(face = 'bold',size = rel(1.5),colour="black"))+
labs(title = "COs, TEs and markers distribution along chromosomes")
dev.off()
plot_break_axis.R
Plot histogram with break y axis.
library(Rmisc)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(ggsci)
library(ggpubr)
rm(list=ls())
df <- read.table("plot_break_axis.txt", header = T, sep = "\t")
df.mod <- melt(df, id.vars = c('Type',"Repeat"), variable.name = 'Treat', value.name = 'Value')
df.mod.se <- summarySE(df.mod, measurevar="Value", groupvars=c("Type","Treat"))
p1 <- ggplot(df.mod.se,aes(x=Treat,y=Value,fill=Type)) +
geom_bar(position="dodge", stat="identity",width=.8, size=.25) +
geom_errorbar(aes(ymin=Value-se, ymax=Value+se), position=position_dodge(0.8), width=.25, size=0.5) +
labs(x=NULL,y=NULL,fill=NULL)+
theme_few()+
guides(fill=FALSE)+
scale_fill_lancet()+
coord_cartesian(ylim = c(0,4))+
scale_y_continuous(breaks = seq(0,4,1))
p2 <- ggplot(df.mod.se,aes(x=Treat,y=Value,fill=Type)) +
geom_bar(position="dodge", stat="identity",width=.8, size=.25) +
geom_errorbar(aes(ymin=Value-se, ymax=Value+se), position=position_dodge(0.8), width=.25, size=0.5) +
labs(x=NULL,y=NULL,fill=NULL) +
theme_few()+
guides(fill=FALSE)+
scale_fill_lancet()+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank()) +
coord_cartesian(ylim = c(15,45)) +
scale_y_continuous(breaks = seq(15,45,10))
ggarrange(p2,p1,heights=c(2/5, 3/5),ncol = 1, nrow = 2,common.legend = TRUE,legend="right",align = "v")
ggsave(file = "plot_break_axis.pdf", height = 3.2, width = 3, dpi = 300, device = "pdf")
plot_pie_donut.R
library(ggplot2)
data <- data.frame(category = c('A','B','C','D','E'),
count = c(10,23,8,33,12))
data$category <- factor(data$category,levels=rev(c('A','B','C','D','E')))
# Calculate proportion
data$fraction <- round(data$count / sum(data$count), 2) * 100
# Compute the cumulative percentages (top of each rectangle)
data$ymax <- cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin <- c(0, head(data$ymax, n=-1))
# Compute label position
data$labelPosition <- data$fraction/2 + data$ymin
# Generate labels
data$label <- paste0(data$fraction, "%")
# Define blank theme
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
# Pie and donut ploting
ggplot(data=data, mapping=aes(x=2,y=fraction,fill=category))+
geom_bar(stat="identity",color="white", width =1, linetype=1,size=1)+
geom_text(x=2, aes(y=labelPosition, label=label), size=4)+
blank_theme+
coord_polar(theta = "y")+
xlim(1.5, 2.5) # Pie plot
# xlim(1.5, 2.5) # Donut plot
# Note: An alternative way to plot pie or donut is by geom_rect() and coord_polar().
plot_heatmap.R
library(ggplot2)
library(reshape2)
library(ggthemes)
rm(list = ls())
data <- read.table("heatmap_data.txt", header = T, sep = "\t")
data.m <- melt(data)
data.m$Gene <- factor(data.m$Gene,levels=rev(c("A","B","C","D","E","F","G","H","I","J","K")))
data.m$variable <- factor(data.m$variable,levels=c("WT","Mutant"))
data.m$Treat <- factor(data.m$Treat,levels=c("With N","No N"))
ggplot(data.m, aes(variable, Gene)) +
facet_grid( .~ Treat)+
geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient2('log2(TPM)',low = "blue", high = "red", mid = 'white')+
labs(x = "", y = "")+
theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 45)) +
coord_fixed(ratio=1)
ggsave(file = "heatmap.pdf", height = 6, width = 3, dpi = 300, device = "pdf")
# Note: we can also add more theme modifications to beautify the final image, like, coord_flip().
plot_volcano_diagram.R
library(ggplot2)
library(ggthemes)
library(ggrepel)
deg <- read.table("volcano_plot.txt", sep = "\t", header = T)
deg$Change <- as.factor(ifelse(deg$pvalue < 0.05 & deg$log2FoldChange > 1, 'Up',
ifelse(deg$pvalue < 0.05 & deg$log2FoldChange < -1, 'Down', 'Normal')))
deg_tag <- subset(deg, abs(log2FoldChange)>2 & pvalue<1E-15)
ggplot(deg, aes(x = log2FoldChange, y = -log10(pvalue)))+
geom_point(data = deg_tag, size=2, color = "black") +
geom_point(alpha = 0.7,aes(color = Change)) +
#scale_colour_brewer(palette = "Set1")+
scale_color_manual(values = c("#008CD6","gray", "#E50044"))+
geom_hline(yintercept = -log10(0.05), linetype = 3) +
geom_vline(xintercept = c(-1,1), linetype = 3) +
geom_text_repel(data = deg_tag, aes(x = log2FoldChange, y = -log10(pvalue), label = gene_id),
color = "black", fontface = "italic",show.legend = FALSE)+
labs(x=expression(paste(log[2], "(", "Fold change", ")", sep="")),
y=expression(paste(-log[10], "(", italic(P), " ", value, ")", sep="")))+
theme_bw()
ggsave(filename = "ARF3_data.pdf", heigh = 6, width =6, device = "pdf")
P.S. We all love ggplot2! A great tutorial for ggplot2: Top50-Ggplot2-Visualizations.
PP.S.These R scripts are frequently-used in my data visualization. I will update more. @2021-06-09
Xianqing Jia jiaxq.nju@gmail.com