meggart/DiskArrays.jl

Index error when using mapCube and Distributed

dpabon opened this issue · 3 comments

Here a minimal working example:

using Pkg

Pkg.activate("/Net/Groups/BGI/people/dpabon/nfdi4earth_oemc")

using Distributed
println(myid()," ", DEPOT_PATH)

addprocs(10)
@everywhere begin
using Pkg

Pkg.activate("/Net/Groups/BGI/people/dpabon/nfdi4earth_oemc")
 
using YAXArrays
using Zarr
function simple_function(out1, cube_in1, cube_in2)
    #=
    out1 .= 1
    =#
end
end

lcc_final_cube = open_dataset("/Net/Groups/BGI/scratch/dpabon/lcc_final_cube_2002_2011_8d-0.083deg.zarr")
lcc_final_cube = Cube(lcc_final_cube)


lcc_classes = ["Evergreen_Broadleaf_Forests", 
    "Deciduous_Broadleaf_Forests",
    "Mixed_Forest",
    "Savannas", 
    "Grasslands",
    "Croplands"]


lcc_small = lcc_final_cube[lon =(-78, -68), lat = (-5, 15), time = 2002:2003, classes = lcc_classes]



lst_small = open_dataset("/Net/Groups/BGI/scratch/dpabon/lst_small.zarr")
lst_small = Cube(lst_small)

renameaxis!(lcc_small, "lon" => getAxis("lon", lst_small))
renameaxis!(lcc_small, "lat" => getAxis("lat", lst_small))

 

indims_cube_1 = InDims("time")

indims_cube_2 = InDims("time", "classes")

out_1_dims = OutDims("time", CategoricalAxis("summary_stat", ["rsquared"]))
    

mapCube(simple_function, (lst_small, lcc_small), indims = (indims_cube_1, indims_cube_2), outdims = out_1_dims)

produce:

ERROR: On worker 2:
BoundsError: attempt to access 20×6×120×45 Array{Float32, 4} at index [1:20, 1, 1225:1296, 901:945]
Stacktrace:
  [1] throw_boundserror
    @ ./abstractarray.jl:703
  [2] checkbounds
    @ ./abstractarray.jl:668 [inlined]
  [3] _setindex!
    @ ./multidimensional.jl:929 [inlined]
  [4] setindex!
    @ ./abstractarray.jl:1344 [inlined]
  [5] filldata!
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/batchgetindex.jl:127
  [6] #disk_getindex_batch!#38
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/batchgetindex.jl:113
  [7] disk_getindex_batch
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/batchgetindex.jl:121
  [8] batchgetindex
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/batchgetindex.jl:72
  [9] getindex_disk
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/diskarray.jl:28
 [10] getindex
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/diskarray.jl:177
 [11] readblock!
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrayTools/WsUY6/src/DiskArrayTools.jl:250
 [12] readblock!
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/subarray.jl:25
 [13] getindex_disk
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/diskarray.jl:31
 [14] getindex
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/DiskArrays/f8PI0/src/diskarray.jl:177
 [15] updatear
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/Fe7F8/src/DAT/DAT.jl:639
 [16] #78
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/Fe7F8/src/DAT/DAT.jl:571
 [17] #59
    @ ./tuple.jl:556 [inlined]
 [18] BottomRF
    @ ./reduce.jl:81 [inlined]
 [19] _foldl_impl
    @ ./reduce.jl:62
 [20] foldl_impl
    @ ./reduce.jl:48 [inlined]
 [21] mapfoldl_impl
    @ ./reduce.jl:44 [inlined]
 [22] #mapfoldl#259
    @ ./reduce.jl:170 [inlined]
 [23] #foldl#260
    @ ./reduce.jl:193 [inlined]
 [24] foreach
    @ ./tuple.jl:556 [inlined]
 [25] updatears
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/Fe7F8/src/DAT/DAT.jl:568
 [26] updateinars
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/Fe7F8/src/DAT/DAT.jl:656
 [27] #107
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/Fe7F8/src/DAT/DAT.jl:700
 [28] fnew
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/Fe7F8/src/DAT/DAT.jl:665
 [29] #56
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/ProgressMeter/sN2xr/src/ProgressMeter.jl:1016
 [30] #invokelatest#2
    @ ./essentials.jl:729
 [31] invokelatest
    @ ./essentials.jl:726
 [32] #110
    @ /Net/Groups/BGI/people/dpabon/bin/julia-1.8.5-linux-x86_64/julia-1.8.5/share/julia/stdlib/v1.8/Distributed/src/process_messages.jl:285
 [33] run_work_thunk
    @ /Net/Groups/BGI/people/dpabon/bin/julia-1.8.5-linux-x86_64/julia-1.8.5/share/julia/stdlib/v1.8/Distributed/src/process_messages.jl:70
 [34] macro expansion
    @ /Net/Groups/BGI/people/dpabon/bin/julia-1.8.5-linux-x86_64/julia-1.8.5/share/julia/stdlib/v1.8/Distributed/src/process_messages.jl:285 [inlined]
 [35] #109
    @ ./task.jl:484
Stacktrace:
  [1] (::Base.var"#939#941")(x::Task)
    @ Base ./asyncmap.jl:177
  [2] foreach(f::Base.var"#939#941", itr::Vector{Any})
    @ Base ./abstractarray.jl:2774
  [3] maptwice(wrapped_f::Function, chnl::Channel{Any}, worker_tasks::Vector{Any}, c::DiskArrays.GridChunks{2})
    @ Base ./asyncmap.jl:177
  [4] wrap_n_exec_twice
    @ ./asyncmap.jl:153 [inlined]
  [5] #async_usemap#924
    @ ./asyncmap.jl:103 [inlined]
  [6] #asyncmap#923
    @ ./asyncmap.jl:81 [inlined]
  [7] pmap(f::Function, p::WorkerPool, c::DiskArrays.GridChunks{2}; distributed::Bool, batch_size::Int64, on_error::Nothing, retry_delays::Vector{Any}, retry_check::Nothing)
    @ Distributed /Net/Groups/BGI/people/dpabon/bin/julia-1.8.5-linux-x86_64/julia-1.8.5/share/julia/stdlib/v1.8/Distributed/src/pmap.jl:126
  [8] pmap(f::Function, p::WorkerPool, c::DiskArrays.GridChunks{2})
    @ Distributed /Net/Groups/BGI/people/dpabon/bin/julia-1.8.5-linux-x86_64/julia-1.8.5/share/julia/stdlib/v1.8/Distributed/src/pmap.jl:99
  [9] macro expansion
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/ProgressMeter/sN2xr/src/ProgressMeter.jl:1015 [inlined]
 [10] macro expansion
    @ ./task.jl:454 [inlined]
 [11] macro expansion
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/ProgressMeter/sN2xr/src/ProgressMeter.jl:1014 [inlined]
 [12] macro expansion
    @ ./task.jl:454 [inlined]
 [13] progress_map(::Function, ::Vararg{Any}; mapfun::typeof(pmap), progress::ProgressMeter.Progress, channel_bufflen::Int64, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ ProgressMeter /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/ProgressMeter/sN2xr/src/ProgressMeter.jl:1007
 [14] #progress_pmap#60
    @ /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/ProgressMeter/sN2xr/src/ProgressMeter.jl:1032 [inlined]
 [15] pmap_with_data(f::Function, p::WorkerPool, c::DiskArrays.GridChunks{2}; initfunc::Function, progress::ProgressMeter.Progress, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ YAXArrays.DAT /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/au5n4/src/DAT/DAT.jl:668
 [16] pmap_with_data(f::Function, c::DiskArrays.GridChunks{2}; initfunc::Function, kwargs::Base.Pairs{Symbol, ProgressMeter.Progress, Tuple{Symbol}, NamedTuple{(:progress,), Tuple{ProgressMeter.Progress}}})
    @ YAXArrays.DAT /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/au5n4/src/DAT/DAT.jl:673
 [17] runLoop(dc::YAXArrays.DAT.DATConfig{2, 1}, showprog::Bool)
    @ YAXArrays.DAT /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/au5n4/src/DAT/DAT.jl:698
 [18] mapCube(::typeof(simple_function), ::Tuple{YAXArray{Union{Missing, Float32}, 3, DiskArrayTools.CFDiskArray{Float32, 3, Float32, DiskArrays.SubDiskArray{Float32, 3}}, Vector{RangeAxis}}, YAXArray{Union{Missing, Float32}, 4, DiskArrays.SubDiskArray{Union{Missing, Float32}, 4}, Vector{CubeAxis}}}; max_cache::Float64, indims::Tuple{InDims, InDims}, outdims::OutDims, inplace::Bool, ispar::Bool, debug::Bool, include_loopvars::Bool, showprog::Bool, irregular_loopranges::Bool, nthreads::Dict{Int64, Int64}, loopchunksize::Dict{Any, Any}, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ YAXArrays.DAT /Net/Groups/BGI/people/dpabon/bin/julia_packages/packages/YAXArrays/au5n4/src/DAT/DAT.jl:475
 [19] top-level scope
    @ ~/dpabon/nfdi4earth_oemc/bin/julia/exploratory_analysis/minimal_example_batch_issue.jl:50

Probably this is a YAXArrays.jl or DiskArrayTools.jl issue?

Not really because the error is pointing first to DiskArrays. I made a mistake in the code above. I forgot to call everywhere

using Pkg

Pkg.activate("/Net/Groups/BGI/people/dpabon/nfdi4earth_oemc")

I have edited accordingly

Ok, for some reason the environment was not properly activated on each one of the workers, producing that some workers loaded the previous version 0.3.8 of DiskArrays. This is solved if you call ``Pkg.instantiate()``` after activating the environment.

using Pkg
Pkg.activate("/Net/Groups/BGI/people/dpabon/nfdi4earth_oemc")
Pkg.instantiate()
using YAXArrays
using Zarr


using SlurmClusterManager

addprocs(10)
@everywhere begin
  using Pkg
  Pkg.activate("/Net/Groups/BGI/people/dpabon/nfdi4earth_oemc")
   using YAXArrays
  using Zarr
  function simple_function(out1, cube_in1, cube_in2)
      #=
      out1 .= 1
      =#
  end
end

Thanks to @meggart for the solution!