/ceres

Computational correction of copy-number effect in CRISPR-Cas9 essentiality screens

Primary LanguageRBSD 3-Clause "New" or "Revised" LicenseBSD-3-Clause

CERES

Computational correction of copy-number effect in CRISPR-Cas9 essentiality screens

Installation instructions

You will need several packages available on Bioconductor before installing CERES. To install these, run:

source("https://bioconductor.org/biocLite.R")
biocLite(c("Biostrings", "Rsamtools", 
            "GenomeInfoDb", "BSgenome", 
            "BSgenome.Hsapiens.UCSC.hg19", "GenomicRanges"), type="source")

If the devtools package is not already installed, install from the R console:

install.packages("devtools")

To install CERES, either run:

devtools::install_github("cancerdatasci/ceres")

or clone the ceres repository, navigate to the parent directory of the local copy, and run from the R console:

devtools::install("ceres")

Note that if C++11 support is not already enabled, you may need to run

Sys.setenv("PKG_CXXFLAGS"="-std=c++11")

prior to running the install command.

Preparing CERES inputs also requires the bowtie and samtools command line tools. For OSX users with homebrew installed on their machine, these can be installed from the command line:

brew tap brewsci/science
brew install bowtie
brew install samtools

Run CERES on example data

Download these zipped files from depmap.org/ceres and extract into a directory. (e.g. ./data/download). If you haven't already fetched / built them yourself, you should also separately download the necessary bowtie indices here and place the unzipped files in the bowtie_indexes directory of the example folder.

The data in the example files are from screens of 33 cancer cell lines published in Aguirre et al. 2016 and 14 AML lines published in Wang et al. 2017.

Run the example script below, ensuring that the data_dir variable points to the directory with the data download.

library(ceres)

### Setup

# Edit this line to point to data directory
data_dir <- "./data/download"

cn_seg_file <- file.path(data_dir, "CCLE_copynumber_2013-12-03.seg.txt")
gene_annot_file <- file.path(data_dir, "CCDS.current.txt")

# Set bowtie index directory. Not needed if $BOWTIE_INDEXES environmental variable is set and includes hg19 index.
Sys.setenv(BOWTIE_INDEXES = file.path(data_dir, "bowtie_indexes"))


gecko_dep_file <- file.path(data_dir, "Gecko.gct")
gecko_rep_map <- file.path(data_dir, "Gecko_replicate_map.tsv")

wang_dep_file <- file.path(data_dir, "Wang2017.gct")
wang_rep_map <- file.path(data_dir, "Wang2017_replicate_map.tsv")



### Run CERES on Gecko data

gecko_inputs_dir <- file.path("./data/gecko_ceres_inputs", Sys.Date())

prepare_ceres_inputs(inputs_dir=gecko_inputs_dir,
                     dep_file=gecko_dep_file,
                     cn_seg_file=cn_seg_file,
                     gene_annot_file=gene_annot_file,
                     rep_map_file=gecko_rep_map,
                     chromosomes=paste0("chr", 1:22),
                     dep_normalize="zmad")

gecko_ceres <-
    wrap_ceres(sg_path=file.path(gecko_inputs_dir, "guide_sample_dep.Rds"),
               cn_path=file.path(gecko_inputs_dir, "locus_sample_cn.Rds"),
               guide_locus_path=file.path(gecko_inputs_dir, "guide_locus.Rds"),
               locus_gene_path=file.path(gecko_inputs_dir, "locus_gene.Rds"),
               replicate_map_path=file.path(gecko_inputs_dir, "replicate_map.Rds"),
               run_id="Gecko",
               params=list(lambda_g=0.68129207))

gecko_ceres_scaled <-
    scale_to_essentials(gecko_ceres$gene_essentiality_results$ge_fit)


### Run CERES on Wang2017 data

wang_inputs_dir <- file.path("./data/wang_ceres_inputs", Sys.Date())

prepare_ceres_inputs(inputs_dir=wang_inputs_dir,
                     dep_file=wang_dep_file,
                     cn_seg_file=cn_seg_file,
                     gene_annot_file=gene_annot_file,
                     rep_map_file=wang_rep_map,
                     chromosomes=paste0("chr", 1:22),
                     dep_normalize="zmad")

wang_ceres <-
    wrap_ceres(sg_path=file.path(wang_inputs_dir, "guide_sample_dep.Rds"),
               cn_path=file.path(wang_inputs_dir, "locus_sample_cn.Rds"),
               guide_locus_path=file.path(wang_inputs_dir, "guide_locus.Rds"),
               locus_gene_path=file.path(wang_inputs_dir, "locus_gene.Rds"),
               replicate_map_path=file.path(wang_inputs_dir, "replicate_map.Rds"),
               run_id="Wang2017",
               params=list(lambda_g=0.68129207))

wang_ceres_scaled <-
    scale_to_essentials(wang_ceres$gene_essentiality_results$ge_fit)