/PSPG_245B

Primary LanguageJupyter Notebook

Instance

Graph Stats

Find number of each node type in graph

MATCH (node)
RETURN
head(labels(node)) AS label,
count(*) AS count
ORDER BY count DESC

Find number of each edge type in graph

MATCH ()-[rel]->()
RETURN
type(rel) AS rel_type,
count(*) AS count
ORDER BY count DESC

Get examples of each edge/node type

MATCH ()-[rel]->()
WITH type(rel) AS rel_type, collect(rel) AS rels
WITH rels[toInteger(rand() * size(rels))] AS rel
RETURN startNode(rel), rel, endNode(rel)

Multiple Sclerosis (MS) examples

Find Disease node for MS

MATCH (d: Disease) WHERE d.name =~ '(?i).*Multiple Sclerosis.*'
RETURN d

Find Compounds that treat MS

MATCH (d: Disease) WHERE d.name =~ '(?i).*Multiple Sclerosis.*'
MATCH compound_for_ms = ((c: Compound)-[:TREATS_CtD]-(d))
RETURN compound_for_ms

Find Genes that bind Compounds that trat MS

MATCH (d: Disease) WHERE d.name =~ '(?i).*Multiple Sclerosis.*'
MATCH compound_for_ms = ((c: Compound)-[:TREATS_CtD]-(d))
MATCH ms_genes_with_compound= ((c)-[:BINDS_CbG]-(g:Gene)-[]-(d))
RETURN ms_genes_with_compound

Find Molecular Functions (GeneOntology) for the Genes that bind Compounds that trat MS

MATCH (d: Disease) WHERE d.name =~ '(?i).*Multiple Sclerosis.*'
MATCH compound_for_ms = ((c: Compound)-[:TREATS_CtD]-(d))
MATCH ms_genes_with_compound= ((c)-[:BINDS_CbG]-(g:Gene)-[]-(d))
MATCH mf = ((g)-[:PARTICIPATES_GpMF]-(:MolecularFunction))
RETURN ms_genes_with_compound, mf

Myelin examples

Find all GeneOntology nodes involving Myelin

MATCH (cc: CellularComponent) WHERE cc.name =~ '(?i).*Myelin.*'
MATCH (m: MolecularFunction) WHERE m.name =~ '(?i).*Myelin.*'
MATCH (b: BiologicalProcess) WHERE b.name =~ '(?i).*Myelin.*'
RETURN cc, m, b

Find Compounds that Bind that Participate in GeneOntology nodes involving Myelin

MATCH (cc: CellularComponent) WHERE cc.name =~ '(?i).*Myelin.*'
MATCH (m: MolecularFunction) WHERE m.name =~ '(?i).*Myelin.*'
MATCH (b: BiologicalProcess) WHERE b.name =~ '(?i).*Myelin.*'
MATCH compounds_for_myelin_genes = ((c: Compound)-[:BINDS_CbG]-(:Gene)-[:PARTICIPATES_GpCC]-(cc)), ((c)-[]-(:Gene)-[:PARTICIPATES_GpMF]-(m)), ((c)-[]-(:Gene)-[:PARTICIPATES_GpBP]-(b))
RETURN compounds_for_myelin_genes

Find Diseases Treated by Compounds that Bind Genes that Participate in GeneOntology nodes involving Myelin

MATCH (cc: CellularComponent) WHERE cc.name =~ '(?i).*Myelin.*'
MATCH (m: MolecularFunction) WHERE m.name =~ '(?i).*Myelin.*'
MATCH (b: BiologicalProcess) WHERE b.name =~ '(?i).*Myelin.*'
MATCH compounds_for_myelin_genes = ((c: Compound)-[:BINDS_CbG]-(:Gene)-[:PARTICIPATES_GpCC]-(cc)), ((c)-[]-(:Gene)-[:PARTICIPATES_GpMF]-(m)), ((c)-[]-(:Gene)-[:PARTICIPATES_GpBP]-(b))
MATCH disease_compounds_treat = ((c)-[:TREATS_CtD]-(:Disease))
RETURN compounds_for_myelin_genes, cc, m, b, disease_compounds_treat

DWPC example

Find DWPC for meta path DaGiGpBP between MS and GeneOntology BiologicalProcess nodes1

MATCH path = (n0:Disease)-[e1:ASSOCIATES_DaG]-(n1)-[:INTERACTS_GiG]-(n2)-[:PARTICIPATES_GpBP]-(n3:BiologicalProcess)
WHERE n0.name = 'multiple sclerosis'
AND 'GWAS Catalog' in e1.sources
AND exists((n0)-[:LOCALIZES_DlA]-()-[:UPREGULATES_AuG]-(n2))
WITH
[
size((n0)-[:ASSOCIATES_DaG]-()),
size(()-[:ASSOCIATES_DaG]-(n1)),
size((n1)-[:INTERACTS_GiG]-()),
size(()-[:INTERACTS_GiG]-(n2)),
size((n2)-[:PARTICIPATES_GpBP]-()),
size(()-[:PARTICIPATES_GpBP]-(n3))
] AS degrees, path, n3 as target
WITH
target.identifier AS go_id,
target.name AS go_name,
count(path) AS PC,
sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -0.7)) AS DWPC,
size((target)-[:PARTICIPATES_GpBP]-()) AS n_genes
WHERE 5 <= n_genes <= 100 AND PC >= 2
RETURN
go_id, go_name, PC, DWPC, n_genes
ORDER BY DWPC DESC
LIMIT 5

MS example workshop

Epilepsy example workshop

Other examples

DISEASE = 'DOID:6364'
DISEASE_NAME = "migraine"

DISEASE = 'DOID:0050742'
DISEASE_NAME = "nicotine_dependence"

For breast cancer please use:

DISEASE = 'DOID:1612'
DISEASE_NAME = "breast_cancer"