Test to plot a treemap w/ R
- https://www.r-graph-gallery.com/234-a-very-basic-treemap.html
- customize labels & colors: https://www.r-graph-gallery.com/236-custom-your-treemap.html
- also possible in Python:
conda create -n treemap -c conda-forge r-treemap
# library
library(treemap)
# Create data
lineage <- c("B.1.1.7","B.1.351","A.27")
count <- c(27,5,9)
data <- data.frame(lineage,count)
# treemap
pdf('treemap.pdf')
treemap(data,
index="lineage",
vSize="count",
type="index"
)
dev.off()
DIR='/scratch_slow/sc2/global/2021-04-11/clades/'
DATE=2021-05-09
awk 'BEGIN{FS=","};{print $2}' $DIR/clades-pangolin.csv | grep -v lineage | sort | uniq -c | awk '{print $2"\t"$1}' > ${DATE}_sc2-pangolin.tsv
Format of the sc2-pangolin.tsv
to work with the folling R code should be (no header column):
B.1.1.7 24
P.1 3
A.27 12
# library
library(treemap)
date <- '2021-05-09'
data <- read.csv(paste(date, '_sc2-pangolin.tsv', sep=""), sep = "\t", header = F)
# Create data
#lineage <- c("B.1.1.7","B.1.351","A.27")
#count <- c(27,5,9)
#data <- data.frame(lineage,count)
num_lineages <- length(data$V1)
num_sequences <- sum(data$V2)
# treemap
pdf(paste(date, '_treemap.pdf', sep = ''))
treemap(data,
index='V1',
vSize='V2',
type="index",
fontsize.labels=c(10), # size of labels. Give the size per level of aggregation: size for group, size for subgroup, sub-subgroups...
#fontcolor.labels=c("white","orange"), # Color of labels
fontface.labels=c(1), # Font of labels: 1,2,3,4 for normal, bold, italic, bold-italic...
bg.labels=c("transparent"), # Background color of labels
align.labels=list(
#c("center", "center")
c("left", "top")
), # Where to place labels in the rectangle?
overlap.labels=0.5, # number between 0 and 1 that determines the tolerance of the overlap between labels. 0 means that labels of lower levels are not printed if higher level labels overlap, 1 means that labels are always printed. In-between values, for instance the default value .5, means that lower level labels are printed if other labels do not overlap with more than .5 times their area size.
inflate.labels=F, # If true, labels are bigger when rectangle is bigger.
palette = "Set2", # Select your color palette from the RColorBrewer presets or make your own.
title= paste("Occurence of SARS-CoV-2 lineages in Germany, ", date, " (", num_sequences, " sequences & ", num_lineages, " different lineages)", sep=""), # Customize your title
fontsize.title=9, # Size of the title
)
dev.off()
awk '{if($1!="B.1.1.7"){print $0}}' 2021-05-09_sc2-pangolin.tsv > 2021-05-09_sc2-pangolin-no-B117.tsv