PTa Analysis TOolkit
Includes QC and SNV, INDEL, SV and CNV filtration.
- Nextflow
- Singularity
- R/4.1.2
- BSgenome
- copynumber
- cowplot
- ggplot2
- gtools
- MutationalPatterns
- randomForest
- scales
- StructuralVariantAnnotation
- VariantAnnotation
- Install Nextflow
- Install Singularity
- Pull/Clone PTATO
- Get & configure resources
- Configure nextflow
- Configure processes
git clone git@github.com:ToolsVanBox/PTATO.git
In this section we'll provide you with a way to run the workflow.
Always keep these lines in your run.config file:
includeConfig "${projectDir}/configs/process.config"
includeConfig "${projectDir}/configs/nextflow.config"
includeConfig "${projectDir}/configs/resources.config"
All of the parameters in the params section can also be supplied on the commandline or can be pre-filled in the run.config file.
params {
run {
snvs = true
QC = false
svs = false
indels = false
cnvs = false
}
// TRAINING
train {
version = '2.0.0'
}
pta_vcfs_dir = ''
nopta_vcfs_dir = ''
// END TRAINING
// TESTING
input_vcfs_dir = ''
bams_dir = ''
// END TESTING
out_dir = ''
bulk_names = [
['donor_id', 'sample_id'],
]
snvs {
rf_rds = "${projectDir}/resources/hg38/snvs/randomforest/randomforest_v1.0.0.rds"
}
indels {
rf_rds = ''
excludeindellist = "${projectDir}/resources/hg38/indels/excludeindellist/PTA_Indel_ExcludeIndellist_normNoGTrenamed.vcf.gz"
}
optional {
germline_vcfs_dir = ''
short_variants {
somatic_vcfs_dir = ''
walker_vcfs_dir = ''
phased_vcfs_dir = ''
ab_tables_dir = ''
context_beds_dir = ''
features_beds_dir = ''
}
snvs {
rf_tables_dir = ''
ptato_vcfs_dir = ''
}
indels {
rf_tables_dir = ''
ptato_vcfs_dir = ''
}
qc {
wgs_metrics_dir = ''
alignment_summary_metrics_dir = ''
}
svs {
gridss_driver_vcfs_dir = ''
gridss_unfiltered_vcfs_dir = ''
gripss_somatic_filtered_vcfs_dir = ''
gripss_filtered_files_dir = ''
integrated_sv_files_dir = ''
}
cnvs {
cobalt_ratio_tsv_dir = ''
cobalt_filtered_readcounts_dir = ''
baf_filtered_files_dir = ''
}
}
}
Create the run.config file to look like this:
params {
run {
snvs = true
QC = true
svs = true
indels = true
cnvs = true
}
// TRAINING
train {
version = '2.0.0'
}
pta_vcfs_dir = ''
nopta_vcfs_dir = ''
// END TRAINING
// TESTING
input_vcfs_dir = '/path/to/vcfs_dir/'
bams_dir = '/path/to/bams_dir/'
// END TESTING
out_dir = ''
bulk_names = [
['Donor_1', 'mycontrol'],
]
snvs {
rf_rds = "${projectDir}/resources/hg38/snvs/randomforest/randomforest_v1.0.0.rds"
}
indels {
rf_rds = ''
excludeindellist = "${projectDir}/resources/hg38/indels/excludeindellist/PTA_Indel_ExcludeIndellist_normNoGTrenamed.vcf.gz"
}
optional {
germline_vcfs_dir = ''
short_variants {
somatic_vcfs_dir = ''
walker_vcfs_dir = ''
phased_vcfs_dir = ''
ab_tables_dir = ''
context_beds_dir = ''
features_beds_dir = ''
}
snvs {
rf_tables_dir = ''
ptato_vcfs_dir = ''
}
indels {
rf_tables_dir = ''
ptato_vcfs_dir = ''
}
qc {
wgs_metrics_dir = ''
alignment_summary_metrics_dir = ''
}
svs {
gridss_driver_vcfs_dir = ''
gridss_unfiltered_vcfs_dir = ''
gripss_somatic_filtered_vcfs_dir = ''
gripss_filtered_files_dir = ''
integrated_sv_files_dir = ''
}
cnvs {
cobalt_ratio_tsv_dir = ''
cobalt_filtered_readcounts_dir = ''
baf_filtered_files_dir = ''
}
}
}
Run the workflow on slurm :
nextflow run ptato.nf -c run.config --out_dir /processed_data/ptato/ -profile slurm -resume
/path/to/vcfs_dir
./Donor_1
./myfile.vcf(.gz)
/path/to/bams_dir
./Donor_1
./mycontrol.bam
./mysample1.bam
./mysample2.bam
...