Is it possible to specify the "language" model?
Opened this issue · 1 comments
Hi all,
first of all, thank you for having made this wrapper available. Really useful.
Could you let me know if it is possible to specify the underlying CoreNLP model (english, french, ...) ?
According to what I understand from your code, it won't be easy since you use the simple Core API but it should be possible. Any idea/plans to extend your code with this possibility?
Regards,
Grégory
Hi, in my case i created a new function called for example "ner2" :)
def ner2 = udf { sentence: String =>
val pipeline = getOrCreateSentimentPipeline()
val document = pipeline.process(sentence)
val sentences = document.get(classOf[SentencesAnnotation]).asScala.toList
val tokens = sentences.flatMap{sentence =>
sentence.get(classOf[TokensAnnotation]).asScala.toList}
tokens.map { token =>
//val word = token.get(classOf[TextAnnotation])
val ner = token.get(classOf[NamedEntityTagAnnotation])
//val lemma = token.get(classOf[LemmaAnnotation])
(ner)
}
}
private def getOrCreateSentimentPipeline(): StanfordCoreNLP = {
if (sentimentPipeline == null) {
val props = new Properties()
//props.setProperty("annotators", "tokenize, ssplit, parse, sentiment")
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner")
props.setProperty("tokenize.language", "es")
props.setProperty("tokenize.verbose", "true")
props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/spanish/spanish-distsim.tagger")
props.setProperty("ner.model", "edu/stanford/nlp/models/ner/spanish.ancora.distsim.s512.crf.ser.gz")
props.setProperty("ner.applyNumericClassifiers", "false")
props.setProperty("ner.useSUTime", "false")
props.setProperty("ner.language", "spanish")
props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz")
props.setProperty("depparse.model", "edu/stanford/nlp/models/parser/nndep/UD_Spanish.gz")
props.setProperty("depparse.language", "spanish")
props.setProperty("regexner.ignoreCase", "true")
props.setProperty("regexner.verbose", "true")
sentimentPipeline = new StanfordCoreNLP(props)
}
sentimentPipeline
}