/QAFFPs

Files and scripts used for the generation of QAFFPs

Primary LanguagePythonMIT LicenseMIT

QAFFPs

Files and scripts used for the generation of QAFFPs

First, you need to setup the connection to ChEMBL database (https://www.ebi.ac.uk/chembl/) running on a PostgreSQL server in config.py file, and setup the Python 2.7 virtual environment using the requirements.txt file (pip install -r requirements.txt).

You also need to install RDKit (https://www.rdkit.org/) with Python bindings, and the caret package for R (http://caret.r-forge.r-project.org/).

To run the workflow for the generation of QAFFPs, execute python run.py command in your console (see the code below).

from __future__ import print_function
import os, subprocess
from subprocess import CalledProcessError, check_output

import chembl_wrapper, data, predictions, utils, config as cfg, qaffps as qaffps_lib

def run():
    # Export target sets from ChEMBL
    data.export_target_sets()

    # Export Morgan2 fingerprints for target sets
    data.export_fingerprints_for_target_sets()
    
    # Generate train/test sets     
    try:
        output = check_output(["Rscript", "--vanilla", "data_split.r", cfg.DIRS["FPS"], cfg.DIRS["QSAR_SETS"]])
        returncode = 0
    
    except CalledProcessError as e:
        output = e.output
        returncode = e.returncode
        raise Exception("ERROR IN data_split.r. See the output above.")

    # Build QSAR models
    print("\nBuild QSAR models for all target sets...")
    target_sets = list({x.split(".")[0] for x in os.listdir(cfg.DIRS["TARGET_SETS"])})
    count = len(target_sets)

    for i, target_set in enumerate(target_sets, 1):
        print("{}/{}".format(i, count))
        predictions.build_qsar_models(target_set)

    # Get QSAR models stats
    data.get_qsar_models_stats()

    # Export Morgan2 fingerprints for a ligand set
    utils.prepare_ligand_set_from_set_file(
        input_file=os.path.join(cfg.DIRS["LIGAND_SETS"], "example_set.csv"),
        output_file=os.path.join(cfg.DIRS["LIGAND_SETS"], "example_set.fps")
    )

    # Predict the ligand set on all models
    predictions.predict_ligands_on_all_models(
        ligands_file=os.path.join(cfg.DIRS["LIGAND_SETS"], "example_set.fps"),
        r20_cutoff=0.6,
        q2_cutoff=0.5
    )
    
    # Generate QAFFPs for the ligand set
    qaffps_lib.generate_qaffps(ligand_set_name="example_set", confidence=90, max_dev=2)

    # Get QAFFPs for the ligand set
    qaffps = qaffps_lib.get_qaffps(ligand_set_name="example_set")

    # Get b-QAFFPs for the ligand set
    bqaffps = qaffps_lib.get_bqaffps(ligand_set_name="example_set", cutoff=5)
    
if __name__ == '__main__':
    run()