mthelm85/PlutoDataTable.jl

Doesn't work with categorical data

Closed this issue · 1 comments

Thanks for putting this package up. I noticed it breaks when any columns is of type CategoricalArray, as is the case with the species column in this exmaple:

RDatasets.dataset("datasets", "iris") |> PlutoDataTable.data_table

This results in a StackOverflowError:

(::JSON2.var"#3#4"{DataType})(::Array{String,1})@write.jl:11
macro expansion@write.jl:75[inlined]
#write#22(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(JSON2.write), ::Base.GenericIOBuffer{Array{UInt8,1}}, ::CategoricalArrays.CategoricalPool{String,UInt8,CategoricalArrays.CategoricalValue{String,UInt8}})@write.jl:94
write(::Base.GenericIOBuffer{Array{UInt8,1}}, ::CategoricalArrays.CategoricalPool{String,UInt8,CategoricalArrays.CategoricalValue{String,UInt8}})@write.jl:94
macro expansion@write.jl:75[inlined]
#write#22(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(JSON2.write), ::Base.GenericIOBuffer{Array{UInt8,1}}, ::CategoricalArrays.CategoricalValue{String,UInt8})@write.jl:94
write@write.jl:94[inlined]
#write#7(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(JSON2.write), ::Base.GenericIOBuffer{Array{UInt8,1}}, ::Array{CategoricalArrays.CategoricalValue{String,UInt8},1})@write.jl:39
write(::Base.GenericIOBuffer{Array{UInt8,1}}, ::Array{CategoricalArrays.CategoricalValue{String,UInt8},1})@write.jl:35
macro expansion@write.jl:75[inlined]
...

One workaround I could use was to convert the categorical columns to string:

"""Convert categorical columns to string representation"""
function cat_as_str(df)
    cat_cols = filter(names(df)) do c
        typeof(df[!, c]) <: DataFrames.CategoricalArray
    end
    DataFrames.transform(df, cat_cols .=> DataFrames.ByRow(string) .=> cat_cols)
end

Hi! Thanks for pointing this out. It should work now with any type that can be converted into a String : )

See commit 8770914