aberHRML/classyfireR

Some InChIKeys give errors

Closed this issue · 4 comments

Examples:

bad_key <- 
c("WCYWZMWISLQXQU-UHFFFAOYSA-N",
"WCYWZMWISLQXQU-UHFFFAOYSA-N",
"AKGGYBADQZYZPD-UHFFFAOYSA-N",
"MDHYEMXUFSJLGV-UHFFFAOYSA-N")

what was the error you got ?

I just tried these and no errors;

> library(magrittr)
> library(classyfireR)
> bad_key <- 
c("WCYWZMWISLQXQU-UHFFFAOYSA-N",
"WCYWZMWISLQXQU-UHFFFAOYSA-N",
"AKGGYBADQZYZPD-UHFFFAOYSA-N",
"MDHYEMXUFSJLGV-UHFFFAOYSA-N")

> purrr:::map(bad_key, entity_classification) %>% purrr:::map(., ~{.[[1]]})

[[1]]
                  Classification           CHEMONT
Kingdom        Organic compounds CHEMONTID:0000000
Superlcas           Hydrocarbons CHEMONTID:0002837
Class     Saturated hydrocarbons CHEMONTID:0004474

[[2]]
                  Classification           CHEMONT
Kingdom        Organic compounds CHEMONTID:0000000
Superlcas           Hydrocarbons CHEMONTID:0002837
Class     Saturated hydrocarbons CHEMONTID:0004474

[[3]]
                               Classification           CHEMONT
Kingdom                     Organic compounds CHEMONTID:0000000
Superlcas                          Benzenoids CHEMONTID:0002448
Class     Benzene and substituted derivatives CHEMONTID:0002279

[[4]]
                               Classification           CHEMONT
Kingdom                     Organic compounds CHEMONTID:0000000
Superlcas                          Benzenoids CHEMONTID:0002448
Class     Benzene and substituted derivatives CHEMONTID:0002279

That's strange that it works for you.
I get:

Error: Columns `Classification`, `CHEMONT` must be length 1 or 4, not 3, 3

$subclass in json_res is NULL which seems to be the problem:

$smiles
[1] "[CH3]"

$inchikey
[1] "InChIKey=WCYWZMWISLQXQU-UHFFFAOYSA-N"

$kingdom
$kingdom$name
[1] "Organic compounds"

$kingdom$description
[1] "Compounds that contain at least one carbon atom, excluding isocyanide/cyanide and their non-hydrocarbyl derivatives, thiophosgene, carbon diselenide, carbon monosulfide, carbon disulfide, carbon subsulfide, carbon monoxide, carbon dioxide, Carbon suboxide, and dicarbon monoxide."

$kingdom$chemont_id
[1] "CHEMONTID:0000000"

$kingdom$url
[1] "http://classyfire.wishartlab.com/tax_nodes/C0000000"


$superclass
$superclass$name
[1] "Hydrocarbons"

$superclass$description
[1] "Organic compounds made up only of carbon and hydrogen atoms."

$superclass$chemont_id
[1] "CHEMONTID:0002837"

$superclass$url
[1] "http://classyfire.wishartlab.com/tax_nodes/C0002837"


$class
$class$name
[1] "Saturated hydrocarbons"

$class$description
[1] "Hydrocarbons that contains only saturated carbon atoms, which are linked to one another through single bonds. These includes alkanes and cycloalkanes."

$class$chemont_id
[1] "CHEMONTID:0004474"

$class$url
[1] "http://classyfire.wishartlab.com/tax_nodes/C0004474"


$subclass
NULL

$intermediate_nodes
list()

$direct_parent
$direct_parent$name
[1] "Saturated hydrocarbons"

$direct_parent$description
[1] "Hydrocarbons that contains only saturated carbon atoms, which are linked to one another through single bonds. These includes alkanes and cycloalkanes."

$direct_parent$chemont_id
[1] "CHEMONTID:0004474"

$direct_parent$url
[1] "http://classyfire.wishartlab.com/tax_nodes/C0004474"


$alternative_parents
list()

$molecular_framework
[1] "Aliphatic acyclic compounds"

$substituents
[1] "Saturated hydrocarbon"      "Aliphatic acyclic compound"

$description
[1] "This compound belongs to the class of organic compounds known as saturated hydrocarbons. These are hydrocarbons that contains only saturated carbon atoms, which are linked to one another through single bonds. These includes alkanes and cycloalkanes."

$external_descriptors
  source   source_id     annotations
1  CHEBI CHEBI:29309 organic radical

$ancestors
[1] "Chemical entities"      "Hydrocarbons"           "Organic compounds"      "Saturated hydrocarbons"

$predicted_chebi_terms
[1] "hydrocarbon (CHEBI:24632)"      "chemical entity (CHEBI:24431)"  "organic molecule (CHEBI:72695)"

$predicted_lipidmaps_terms
list()

$classification_version
[1] "2.1"

current version (master branch) now catches missing fields and inserts NA's where appropriate. This should any errors and also make the output a bit more table friendly (see example in README)