YotpoLtd/metorikku

Generic delimited file input support

Closed this issue · 2 comments

Rather than loading CSV file inputs would be nice to also load pipe (|) delimited, tab delimited, ascii x1F (or specific ascii code) delimited files

Once #149 is merged you will be able to add the option:
delimiter: \t to your input configuration

delimiter: ,

gives error

2019-04-02 07:57:05,926 [main] INFO com.yotpo.metorikku.configuration.job.ConfigurationParser$ - Starting Metorikku - Parsing configuration
Exception in thread "main" com.fasterxml.jackson.databind.JsonMappingException: while parsing a block node
in 'reader', line 15, column 20:
delimiter: ,
^
expected the node content, but found FlowEntry
in 'reader', line 15, column 20:
delimiter: ,
^
(through reference chain: com.yotpo.metorikku.configuration.job.Configuration["inputs"]->com.fasterxml.jackson.module.scala.deser.MapBuilderWrapper["ddddata"]->com.yotpo.metorikku.configuration.job.Input["file"]->com.yotpo.metorikku.configuration.job.input.File["options"])
at com.fasterxml.jackson.databind.JsonMappingException.wrapWithPath(JsonMappingException.java:210)
at com.fasterxml.jackson.databind.JsonMappingException.wrapWithPath(JsonMappingException.java:177)
at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.wrapAndThrow(BeanDeserializerBase.java:1474)
at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeWithErrorWrapping(BeanDeserializer.java:465)
at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeUsingPropertyBased(BeanDeserializer.java:379)
at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.deserializeFromObjectUsingNonDefault(BeanDeserializerBase.java:1099)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserializeFromObject(BeanDeserializer.java:296)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:133)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer$$anonfun$deserialize$1.apply(OptionDeserializerModule.scala:50)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer$$anonfun$deserialize$1.apply(OptionDeserializerModule.scala:50)
at scala.Option.map(Option.scala:146)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer.deserialize(OptionDeserializerModule.scala:50)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer.deserialize(OptionDeserializerModule.scala:11)
at com.fasterxml.jackson.databind.deser.SettableBeanProperty.deserialize(SettableBeanProperty.java:520)
at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeWithErrorWrapping(BeanDeserializer.java:463)
at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeUsingPropertyBased(BeanDeserializer.java:379)
at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.deserializeFromObjectUsingNonDefault(BeanDeserializerBase.java:1099)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserializeFromObject(BeanDeserializer.java:296)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:133)
at com.fasterxml.jackson.databind.deser.std.MapDeserializer._readAndBindStringMap(MapDeserializer.java:495)
at com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:341)
at com.fasterxml.jackson.module.scala.deser.UnsortedMapDeserializer.deserialize(UnsortedMapDeserializerModule.scala:76)
at com.fasterxml.jackson.module.scala.deser.UnsortedMapDeserializer.deserialize(UnsortedMapDeserializerModule.scala:39)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer$$anonfun$deserialize$1.apply(OptionDeserializerModule.scala:50)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer$$anonfun$deserialize$1.apply(OptionDeserializerModule.scala:50)
at scala.Option.map(Option.scala:146)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer.deserialize(OptionDeserializerModule.scala:50)
at com.fasterxml.jackson.module.scala.deser.OptionDeserializer.deserialize(OptionDeserializerModule.scala:11)
at com.fasterxml.jackson.databind.deser.SettableBeanProperty.deserialize(SettableBeanProperty.java:520)
at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeWithErrorWrapping(BeanDeserializer.java:463)
at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeUsingPropertyBased(BeanDeserializer.java:379)
at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.deserializeFromObjectUsingNonDefault(BeanDeserializerBase.java:1099)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserializeFromObject(BeanDeserializer.java:296)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:133)
at com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:3736)
at com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2726)
at com.yotpo.metorikku.configuration.job.ConfigurationParser$.parseConfigurationFile(ConfigurationParser.scala:46)
at com.yotpo.metorikku.configuration.job.ConfigurationParser$.parse(ConfigurationParser.scala:37)
at com.yotpo.metorikku.Metorikku$.delayedEndpoint$com$yotpo$metorikku$Metorikku$1(Metorikku.scala:10)
at com.yotpo.metorikku.Metorikku$delayedInit$body.apply(Metorikku.scala:7)
at scala.Function0$class.apply$mcV$sp(Function0.scala:34)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at com.yotpo.metorikku.Metorikku$.main(Metorikku.scala:7)
at com.yotpo.metorikku.Metorikku.main(Metorikku.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: while parsing a block node
in 'reader', line 15, column 20:
delimiter: ,
^
expected the node content, but found FlowEntry
in 'reader', line 15, column 20:
delimiter: ,
^

    at com.fasterxml.jackson.dataformat.yaml.snakeyaml.parser.ParserImpl.parseNode(ParserImpl.java:480)
    at com.fasterxml.jackson.dataformat.yaml.snakeyaml.parser.ParserImpl.parseBlockNodeOrIndentlessSequence(ParserImpl.java:368)
    at com.fasterxml.jackson.dataformat.yaml.snakeyaml.parser.ParserImpl.access$2300(ParserImpl.java:116)
    at com.fasterxml.jackson.dataformat.yaml.snakeyaml.parser.ParserImpl$ParseBlockMappingValue.produce(ParserImpl.java:588)
    at com.fasterxml.jackson.dataformat.yaml.snakeyaml.parser.ParserImpl.peekEvent(ParserImpl.java:158)
    at com.fasterxml.jackson.dataformat.yaml.snakeyaml.parser.ParserImpl.getEvent(ParserImpl.java:168)
    at com.fasterxml.jackson.dataformat.yaml.YAMLParser.nextToken(YAMLParser.java:342)
    at com.fasterxml.jackson.databind.deser.std.MapDeserializer._readAndBindStringMap(MapDeserializer.java:484)
    at com.fasterxml.jackson.databind.deser.std.MapDeserializer.deserialize(MapDeserializer.java:341)
    at com.fasterxml.jackson.module.scala.deser.UnsortedMapDeserializer.deserialize(UnsortedMapDeserializerModule.scala:76)
    at com.fasterxml.jackson.module.scala.deser.UnsortedMapDeserializer.deserialize(UnsortedMapDeserializerModule.scala:39)
    at com.fasterxml.jackson.module.scala.deser.OptionDeserializer$$anonfun$deserialize$1.apply(OptionDeserializerModule.scala:50)
    at com.fasterxml.jackson.module.scala.deser.OptionDeserializer$$anonfun$deserialize$1.apply(OptionDeserializerModule.scala:50)
    at scala.Option.map(Option.scala:146)
    at com.fasterxml.jackson.module.scala.deser.OptionDeserializer.deserialize(OptionDeserializerModule.scala:50)
    at com.fasterxml.jackson.module.scala.deser.OptionDeserializer.deserialize(OptionDeserializerModule.scala:11)
    at com.fasterxml.jackson.databind.deser.SettableBeanProperty.deserialize(SettableBeanProperty.java:520)
    at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeWithErrorWrapping(BeanDeserializer.java:463)
    ... 55 more