apache/arrow-julia

Issue with `Union{Missing, VersionNumber}`

ericphanson opened this issue · 6 comments

struct A
    v::Union{Missing, VersionNumber}
end
table = [A(v"1"), A(missing)]

gives

julia> Arrow.tobuffer(table)
ERROR: ArgumentError: type does not have a definite number of fields
Stacktrace:
  [1] fieldcount(t::Any)
    @ Base ./reflection.jl:814
  [2] default(#unused#::Type{Tuple{Vararg{Union{UInt64, String}}}})
    @ ArrowTypes ~/.julia/packages/ArrowTypes/aTFES/src/ArrowTypes.jl:352
  [3] getindex
    @ ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/struct.jl:80 [inlined]
  [4] ArrowTypes.ToArrow(x::Arrow.ToStruct{Tuple{Vararg{Union{UInt64, String}}}, 4, Vector{Union{Missing, VersionNumber}}})
    @ ArrowTypes ~/.julia/packages/ArrowTypes/aTFES/src/ArrowTypes.jl:381
  [5] arrowvector(x::Arrow.ToStruct{Tuple{Vararg{Union{UInt64, String}}}, 4, Vector{Union{Missing, VersionNumber}}}, i::Int64, nl::Int64, fi::Int64, de::Dict{Int64, Any}, ded::Vector{Arrow.DictEncoding}, meta::Nothing; dictencoding::Bool, dictencode::Bool, maxdepth::Int64, kw::Base.Pairs{Symbol, Union{Nothing, Bool}, NTuple{4, Symbol}, NamedTuple{(:compression, :largelists, :denseunions, :dictencodenested), Tuple{Nothing, Bool, Bool, Bool}}})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/arraytypes.jl:62
  [6] (::Arrow.var"#49#50"{Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}}, Vector{Union{Missing, VersionNumber}}, Int64, Int64, Dict{Int64, Any}, Vector{Arrow.DictEncoding}})(j::Int64)
    @ Arrow ./none:0
  [7] iterate
    @ ./generator.jl:47 [inlined]
  [8] collect_to!(dest::Vector{Arrow.Primitive{UInt32}}, itr::Base.Generator{UnitRange{Int64}, Arrow.var"#49#50"{Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}}, Vector{Union{Missing, VersionNumber}}, Int64, Int64, Dict{Int64, Any}, Vector{Arrow.DictEncoding}}}, offs::Int64, st::Int64)
    @ Base ./array.jl:840
  [9] collect_to!(dest::Vector{Arrow.Primitive{UInt32, Arrow.ToStruct{UInt32, 1, Vector{Union{Missing, VersionNumber}}}}}, itr::Base.Generator{UnitRange{Int64}, Arrow.var"#49#50"{Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}}, Vector{Union{Missing, VersionNumber}}, Int64, Int64, Dict{Int64, Any}, Vector{Arrow.DictEncoding}}}, offs::Int64, st::Int64)
    @ Base ./array.jl:848
 [10] collect_to_with_first!(dest::Vector{Arrow.Primitive{UInt32, Arrow.ToStruct{UInt32, 1, Vector{Union{Missing, VersionNumber}}}}}, v1::Arrow.Primitive{UInt32, Arrow.ToStruct{UInt32, 1, Vector{Union{Missing, VersionNumber}}}}, itr::Base.Generator{UnitRange{Int64}, Arrow.var"#49#50"{Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}}, Vector{Union{Missing, VersionNumber}}, Int64, Int64, Dict{Int64, Any}, Vector{Arrow.DictEncoding}}}, st::Int64)
    @ Base ./array.jl:818
 [11] collect(itr::Base.Generator{UnitRange{Int64}, Arrow.var"#49#50"{Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}}, Vector{Union{Missing, VersionNumber}}, Int64, Int64, Dict{Int64, Any}, Vector{Arrow.DictEncoding}}})
    @ Base ./array.jl:792
 [12] _totuple
    @ ./tuple.jl:401 [inlined]
 [13] Tuple
    @ ./tuple.jl:369 [inlined]
 [14] arrowvector(::ArrowTypes.StructKind, x::Vector{Union{Missing, VersionNumber}}, i::Int64, nl::Int64, fi::Int64, de::Dict{Int64, Any}, ded::Vector{Arrow.DictEncoding}, meta::Base.ImmutableDict{String, String}; kw::Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/struct.jl:93
 [15] arrowvector
    @ ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/struct.jl:89 [inlined]
 [16] arrowvector(::Type{VersionNumber}, x::Vector{Union{Missing, VersionNumber}}, i::Int64, nl::Int64, fi::Int64, de::Dict{Int64, Any}, ded::Vector{Arrow.DictEncoding}, meta::Base.ImmutableDict{String, String}; kw::Base.Pairs{Symbol, Union{Nothing, Integer}, NTuple{6, Symbol}, NamedTuple{(:dictencode, :maxdepth, :compression, :largelists, :denseunions, :dictencodenested), Tuple{Bool, Int64, Nothing, Bool, Bool, Bool}}})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/arraytypes.jl:91
 [17] arrowvector(x::Vector{Union{Missing, VersionNumber}}, i::Int64, nl::Int64, fi::Int64, de::Dict{Int64, Any}, ded::Vector{Arrow.DictEncoding}, meta::Nothing; dictencoding::Bool, dictencode::Bool, maxdepth::Int64, kw::Base.Pairs{Symbol, Union{Nothing, Bool}, NTuple{4, Symbol}, NamedTuple{(:compression, :largelists, :denseunions, :dictencodenested), Tuple{Nothing, Bool, Bool, Bool}}})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/arraytypes.jl:68
 [18] arrowvector
    @ ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/arraytypes.jl:52 [inlined]
 [19] toarrowvector(x::Vector{Union{Missing, VersionNumber}}, i::Int64, de::Dict{Int64, Any}, ded::Vector{Arrow.DictEncoding}, meta::Nothing; compression::Nothing, kw::Base.Pairs{Symbol, Integer, NTuple{5, Symbol}, NamedTuple{(:largelists, :denseunions, :dictencode, :dictencodenested, :maxdepth), Tuple{Bool, Bool, Bool, Bool, Int64}}})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/arraytypes.jl:37
 [20] toarrowvector
    @ ~/.julia/packages/Arrow/rYdxZ/src/arraytypes/arraytypes.jl:34 [inlined]
 [21] (::Arrow.var"#145#146"{Dict{Int64, Any}, Bool, Nothing, Bool, Bool, Bool, Int64, Nothing, Vector{Arrow.DictEncoding}, Vector{Type}, Vector{Any}})(col::Vector{Union{Missing, VersionNumber}}, i::Int64, nm::Symbol)
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/write.jl:337
 [22] eachcolumn
    @ ~/.julia/packages/Tables/AcRIE/src/utils.jl:70 [inlined]
 [23] toarrowtable(cols::Tables.CopiedColumns{NamedTuple{(:v,), Tuple{Vector{Union{Missing, VersionNumber}}}}}, dictencodings::Dict{Int64, Any}, largelists::Bool, compress::Nothing, denseunions::Bool, dictencode::Bool, dictencodenested::Bool, maxdepth::Int64, meta::Nothing, colmeta::Nothing)
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/write.jl:334
 [24] macro expansion
    @ ~/.julia/packages/Arrow/rYdxZ/src/write.jl:195 [inlined]
 [25] macro expansion
    @ ./task.jl:476 [inlined]
 [26] write(writer::Arrow.Writer{IOBuffer}, source::Vector{A})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/write.jl:185
 [27] (::Arrow.var"#137#138"{Vector{A}})(writer::Arrow.Writer{IOBuffer})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/write.jl:283
 [28] open(::Arrow.var"#137#138"{Vector{A}}, ::Type, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Bool, Tuple{Symbol}, NamedTuple{(:file,), Tuple{Bool}}})
    @ Base ./io.jl:395
 [29] open
    @ ./io.jl:392 [inlined]
 [30] #write#136
    @ ~/.julia/packages/Arrow/rYdxZ/src/write.jl:282 [inlined]
 [31] write
    @ ~/.julia/packages/Arrow/rYdxZ/src/write.jl:281 [inlined]
 [32] tobuffer(data::Vector{A}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/utils.jl:132
 [33] tobuffer(data::Vector{A})
    @ Arrow ~/.julia/packages/Arrow/rYdxZ/src/utils.jl:130
 [34] top-level scope
    @ REPL[30]:1
 [35] top-level scope
    @ ~/.julia/juliaup/julia-1.9.1+0.x64.apple.darwin14/share/julia/stdlib/v1.9/REPL/src/REPL.jl:1416

on Arrow 2.5 and 2.6, and Julia 1.8 and 1.9.

This is OK if I replace VersionNumber with String, for example:

struct B
    v::Union{Missing, String}
end
table = [B("1"), B(missing)]

gives

julia> Arrow.tobuffer(table)
IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=320, maxsize=Inf, ptr=1, mark=-1)

as expected

I'm not sure if that's the issue, as VersionNumber's alone work:

julia> struct C
       v::VersionNumber
       end

julia> table = [C(v"1")]
1-element Vector{C}:
 C(v"1.0.0")

julia> Arrow.tobuffer(table)
IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=1144, maxsize=Inf, ptr=1, mark=-1)

and e.g. fieldcount(VersionNumber) works fine.

quinnj commented

Do we know if this used to work and regressed? Or perhaps just never worked?

I’m not sure, I’ve definitely serialized VersionNumbers before, but maybe not with missing. This came up in a new application, it didn’t break a working one.

quinnj commented

Ok, fix is up: #465. I believe this has always been an issue. Kind of a gnarly one, but I think the solution proposed should be robust for structs.

quinnj commented

Alternative fix is up: #466