QAFFPs

Files and scripts used for the generation of QAFFPs

First, you need to setup the connection to ChEMBL database (https://www.ebi.ac.uk/chembl/) running on a PostgreSQL server in config.py file, and setup the Python 2.7 virtual environment using the requirements.txt file (pip install -r requirements.txt).

You also need to install RDKit (https://www.rdkit.org/) with Python bindings, and the caret package for R (http://caret.r-forge.r-project.org/).

To run the workflow for the generation of QAFFPs, execute python run.py command in your console (see the code below).

from __future__ import print_function
import os, subprocess
from subprocess import CalledProcessError, check_output

import chembl_wrapper, data, predictions, utils, config as cfg, qaffps as qaffps_lib

def run():
    # Export target sets from ChEMBL
    data.export_target_sets()

    # Export Morgan2 fingerprints for target sets
    data.export_fingerprints_for_target_sets()
    
    # Generate train/test sets     
    try:
        output = check_output(["Rscript", "--vanilla", "data_split.r", cfg.DIRS["FPS"], cfg.DIRS["QSAR_SETS"]])
        returncode = 0
    
    except CalledProcessError as e:
        output = e.output
        returncode = e.returncode
        raise Exception("ERROR IN data_split.r. See the output above.")

    # Build QSAR models
    print("\nBuild QSAR models for all target sets...")
    target_sets = list({x.split(".")[0] for x in os.listdir(cfg.DIRS["TARGET_SETS"])})
    count = len(target_sets)

    for i, target_set in enumerate(target_sets, 1):
        print("{}/{}".format(i, count))
        predictions.build_qsar_models(target_set)

    # Get QSAR models stats
    data.get_qsar_models_stats()

    # Export Morgan2 fingerprints for a ligand set
    utils.prepare_ligand_set_from_set_file(
        input_file=os.path.join(cfg.DIRS["LIGAND_SETS"], "example_set.csv"),
        output_file=os.path.join(cfg.DIRS["LIGAND_SETS"], "example_set.fps")
    )

    # Predict the ligand set on all models
    predictions.predict_ligands_on_all_models(
        ligands_file=os.path.join(cfg.DIRS["LIGAND_SETS"], "example_set.fps"),
        r20_cutoff=0.6,
        q2_cutoff=0.5
    )
    
    # Generate QAFFPs for the ligand set
    qaffps_lib.generate_qaffps(ligand_set_name="example_set", confidence=90, max_dev=2)

    # Get QAFFPs for the ligand set
    qaffps = qaffps_lib.get_qaffps(ligand_set_name="example_set")

    # Get b-QAFFPs for the ligand set
    bqaffps = qaffps_lib.get_bqaffps(ligand_set_name="example_set", cutoff=5)
    
if __name__ == '__main__':
    run()

skutac/QAFFPs

QAFFPs