Codes applied for the roadmap data analysis part.
If you see '-bash: shuf: command not found' run following code:
brew install coreutils
for tg in *.tagAlign.gz
do
zless $tg | shuf -n 15000000 | bedtobam -i - -g hg19.txt | samtools sort -o "${tg%%.*}".sorted.bam
samtools index "${tg%%.*}".sorted.bam
done
for file in *.sorted.bam; do samtools idxstats "${file%%.*}".sorted.bam | cut -f3 | awk 'BEGIN {total=0} {total += $1} END {print total}'; done
require(TxDb.Hsapiens.UCSC.hg19.knownGene)
# To avoid have to type the whole package name every time, we use the variable name txdb
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
pms <- promoters(genes(txdb), upstream = 5000, downstream = 5000)
gr<- unlist(tile(pms, width=100))
df <- data.frame(seqnames=seqnames(gr),
starts=start(gr),
ends=end(gr),
names=c(rep(".", length(gr))),
scores=c(rep(".", length(gr))),
strands=strand(gr))
write.table(df, file="foo.bed", quote=F, sep="\t", row.names=F, col.names=F)