ziqizhang/jate

NullPointerException when generating ngram from empty content

Closed this issue · 1 comments

It happens when perform candidate extraction and indexing document 'C02-1055' from ACL RD-TEC 1.0

Caused by: java.lang.NullPointerException
	at org.apache.lucene.analysis.jate.ComplexShingleFilter.incrementToken(ComplexShingleFilter.java:234)
	at org.apache.lucene.analysis.core.LowerCaseFilter.incrementToken(LowerCaseFilter.java:45)
	at org.apache.lucene.analysis.jate.EnglishLemmatisationFilter.incrementToken(EnglishLemmatisationFilter.java:30)
	at org.apache.lucene.analysis.util.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:51)
	at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:613)
	at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:344)
	at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:300)
	at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:234)
	at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:450)
	at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1475)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:239)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:163)

'C02-1055' content:

<?xml version="1.0" standalone="yes"?>

<Paper id="C02-1055">
  <Title>amp;quot;</Title>
  <Keywords></Keywords>
  <Abbreviations></Abbreviations>
  <Authors></Authors>
  <References></References>
</Paper>

fixed in beta.7