WIP - open during construction, pre-alpha
built on top of nltk and scikit-learn
- provide baseline feature extraction pipelines suited for each task
- provide user facing classes for each NLP task needed in the OVOS ecosystem
- transparently load different models (model_path or external plugin)
- provide dataset loaders suited for each of those tasks per language
- provide baseline heuristic implementation for each task per language
- replaces ovos-lingua-franca
- provide baseline implementations and benchmarks using classical nltk and scikit-learn algorithms
- minimum viable implementation to ensure lang support
see scripts/training
for training scripts
postag
from ovos_classifiers.postag import OVOSPostag
p = OVOSPostag("en")
print(p.model_id)
print(p.tagset) # Universal Dependencies
print(p.postag("The brown fox jumped over the lazy dog"))
# [('The', 'DET'), ('brown', 'ADJ'), ('fox', 'NOUN'), ('jumped', 'VERB'), ('over', 'ADP'), ('the', 'DET'), ('lazy', 'ADJ'), ('dog', 'VERB')]
p = OVOSPostag("pt")
print(p.model_id)
print(p.tagset) # Universal Dependencies
print(p.postag("Ontem fui passear com o meu cão"))
# [('Ontem', ('Ontem', 'ADV')), ('fui', ('fui', 'VERB')), ('passear', ('passear', 'VERB')), ('com', ('com', 'ADP')), ('o', ('o', 'DET')), ('meu', ('meu', 'PRON')), ('cão', ('cão', 'NOUN'))]
p = OVOSPostag("nltk-brown-brown-ngram-postag")
print(p.model_id)
print(p.tagset) # brown
print(p.postag("The brown fox jumped over the lazy dog"))
# [('The', ('The', 'AT')), ('brown', ('brown', 'JJ')), ('fox', ('fox', 'NN')), ('jumped', ('jumped', 'VBD')), ('over', ('over', 'IN')), ('the', ('the', 'AT')), ('lazy', ('lazy', 'JJ')), ('dog', ('dog', 'NN'))]
p = OVOSPostag("nltk-floresta-visl-brill-postag")
print(p.model_id)
print(p.tagset) # VISL (Portuguese)
print(p.postag("Ontem fui passear com o meu cão"))
# [('Ontem', ('Ontem', 'adv')), ('fui', ('fui', 'v-fin')), ('passear', ('passear', 'v-inf')), ('com', ('com', 'prp')), ('o', ('o', 'art')), ('meu', ('meu', 'pron-det')), ('cão', ('cão', 'n'))]
utterance tags
from ovos_classifiers.utttags import OVOSUtteranceTagger
sentences = [
"The brown fox jumped over the lazy dog",
"Turn off the TV",
"Turn on the lights",
"thats amazing",
"what time is it",
"tell me about einstein"
]
p = OVOSUtteranceTagger("en")
print(p.model_id)
print(p.tagset)
print(p.tag(sentences))
# ['SENTENCE:STATEMENT' 'COMMAND:ACTION' 'COMMAND:ACTION'
# 'SENTENCE:EXCLAMATION' 'QUESTION:QUERY' 'COMMAND:ACTION']