mzkrelx/milmsearch

クローリングでエラー

Closed this issue · 0 comments

mailmanのテストMLで承認するとエラーが発生する。

2013-09-17 10:19:58,470 - [DEBUG] - from application in pool-7-thread-1
crawling start

2013-09-17 10:19:58,556 - [DEBUG] - from application in play-akka.actor.default-dispatcher-4
ml proposal has updated. [id=1, status=accepted]

2013-09-17 10:20:01,963 - [ERROR] - from application in pool-7-thread-1
Crawling Error => Forbidden code point U+001b.
org.xml.sax.SAXParseException: Forbidden code point U+001b.
at nu.validator.htmlparser.impl.Tokenizer.fatal(Tokenizer.java:1044) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.impl.ErrorReportingTokenizer.checkChar(ErrorReportingTokenizer.java:310) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.impl.Tokenizer.stateLoop(Tokenizer.java:1444) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.impl.Tokenizer.tokenizeBuffer(Tokenizer.java:1351) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.io.Driver.runStates(Driver.java:321) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.io.Driver.tokenize(Driver.java:216) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.sax.HtmlParser.tokenize(HtmlParser.java:480) ~[htmlparser-1.4.jar:na]
at nu.validator.htmlparser.sax.HtmlParser.parse(HtmlParser.java:423) ~[htmlparser-1.4.jar:na]
at utils.HTMLUtil$.toNode(HTMLUtil.scala:30) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at models.mailsource.crawlers.MailmanCrawler$$anonfun$crawling$1$$anonfun$apply$1.apply(MailmanCrawler.scala:64) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at models.mailsource.crawlers.MailmanCrawler$$anonfun$crawling$1$$anonfun$apply$1.apply(MailmanCrawler.scala:63) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) ~[scala-library.jar:na]
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) ~[scala-library.jar:na]
at scala.collection.immutable.List.foreach(List.scala:309) ~[scala-library.jar:na]
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) ~[scala-library.jar:na]
at scala.collection.AbstractTraversable.map(Traversable.scala:105) ~[scala-library.jar:na]
at models.mailsource.crawlers.MailmanCrawler$$anonfun$crawling$1.apply(MailmanCrawler.scala:63) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at models.mailsource.crawlers.MailmanCrawler$$anonfun$crawling$1.apply(MailmanCrawler.scala:57) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at scala.collection.immutable.List.foreach(List.scala:309) ~[scala-library.jar:na]
at models.mailsource.crawlers.MailmanCrawler$.crawling(MailmanCrawler.scala:57) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at models.mailsource.Crawler$.crawling(Crawler.scala:30) ~[milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at models.MLProposal$$anonfun$judge$1$$anon$1.run(MLProposal.scala:177) [milmsearch_2.10-0.2-SNAPSHOT.jar:0.2-SNAPSHOT]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) [na:1.7.0_03]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) [na:1.7.0_03]
at java.lang.Thread.run(Thread.java:722) [na:1.7.0_03]

2013-09-17 10:20:01,965 - [DEBUG] - from application in pool-7-thread-1
crawling end