JuliaPluto/PlutoSliderServer.jl

Notebooks stop being responsive after a while

Opened this issue · 4 comments

jbrea commented

From time to time I get an email from students that some notebook on https://bio322.epfl.ch is not interactive anymore, i.e. the page shows constantly the loading bar and the sliders don't work anymore. Usually I just push an empty commit to trigger reloading of the page and then it works again for a while (days to weeks). One error message that I see often in my logs is the following (not sure it is related, though):

Distributed.ProcessExitedException(3)
Stacktrace:
  [1] worker_from_id(pg::Distributed.ProcessGroup, i::Int64)
    @ Distributed /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/cluster.jl:1093
  [2] worker_from_id
    @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/cluster.jl:1090 [inlined]
  [3] #remotecall_fetch#162
    @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
  [4] remotecall_fetch
    @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
  [5] remotecall_eval
    @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/macros.jl:242 [inlined]
  [6] create_emptyworkspacemodule(pid::Int64)
    @ Pluto.WorkspaceManager ~/.julia/packages/Pluto/8HhXY/src/evaluation/WorkspaceManager.jl:280
  [7] bump_workspace_module(session_notebook::Tuple{Pluto.ServerSession, Pluto.Notebook})
    @ Pluto.WorkspaceManager ~/.julia/packages/Pluto/8HhXY/src/evaluation/WorkspaceManager.jl:258
  [8] run_reactive_core!(session::Pluto.ServerSession, notebook::Pluto.Notebook, old_topology::Pluto.NotebookTopology, new_topology::Pluto.NotebookTopology, roots::Vector{Pluto.Cell}; save::Bool, deletion_hook::Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, user_requested_run::Bool, already_run::Vector{Pluto.Cell}, bond_value_pairs::Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}})
    @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:59
  [9] run_reactive_core!
    @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:44 [inlined]
 [10] #267
    @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:22 [inlined]
 [11] withtoken(f::Pluto.var"#267#268"{Bool, Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, Bool, Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}}, Pluto.ServerSession, Pluto.Notebook, Pluto.NotebookTopology, Pluto.NotebookTopology, Vector{Pluto.Cell}}, token::Pluto.Token)
    @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Tokens.jl:19
 [12] #run_reactive!#266
    @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:21 [inlined]
 [13] run_reactive!
    @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:10 [inlined]
 [14] #288
    @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:261 [inlined]
 [15] macro expansion
    @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Tokens.jl:58 [inlined]
 [16] (::Pluto.var"#290#291"{Pluto.var"#288#289"{Base.Pairs{Symbol, Any, NTuple{4, Symbol}, NamedTuple{(:deletion_hook, :save, :user_requested_run, :bond_value_pairs), Tuple{Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, Bool, Bool, Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}}}}}, Pluto.ServerSession, Pluto.Notebook, Pluto.NotebookTopology, Pluto.NotebookTopology, Vector{Pluto.Cell}}})()
    @ Pluto ./task.jl:514┌ Error: Failed to set bond values
│   exception =
│    TaskFailedException
│    Stacktrace:
│      [1] wait
│        @ ./task.jl:349 [inlined]
│      [2] fetch
│        @ ./task.jl:369 [inlined]
│      [3] maybe_async(f::Pluto.var"#288#289"{Base.Pairs{Symbol, Any, NTuple{4, Symbol}, NamedTuple{(:deletion_hook, :save, :user_requested_run, :bond_value_pairs), Tuple{Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, Bool, Bool, Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}}}}}, Pluto.ServerSession, Pluto.Notebook, Pluto.NotebookTopology, Pluto.NotebookTopology, Vector{Pluto.Cell}}, async::Bool)
│        @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:270
│      [4] run_reactive_async!(session::Pluto.ServerSession, notebook::Pluto.Notebook, old::Pluto.NotebookTopology, new::Pluto.NotebookTopology, to_run::Vector{Pluto.Cell}; run_async::Bool, kwargs::Base.Pairs{Symbol, Any, NTuple{4, Symbol}, NamedTuple{(:deletion_hook, :save, :user_requested_run, :bond_value_pairs), Tuple{Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, Bool, Bool, Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}}}}})
│        @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:260
│      [5] run_reactive_async!
│        @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:259 [inlined]
│      [6] #run_reactive_async!#286
│        @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:257 [inlined]
│      [7] run_reactive_async!
│        @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:257 [inlined]
│      [8] set_bond_values_reactive(; session::Pluto.ServerSession, notebook::Pluto.Notebook, bound_sym_names::Vector{Symbol}, is_first_values::Vector{Bool}, initiator::Nothing, kwargs::Base.Pairs{Symbol, Bool, Tuple{Symbol}, NamedTuple{(:run_async,), Tuple{Bool}}})
│        @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/RunBonds.jl:60
│      [9] set_bond_values_reactive
│        @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/RunBonds.jl:1 [inlined]
│     [10] (::PlutoSliderServer.var"../HTTPRouter.jl".var"#4#16"{Dict{Symbol, Any}, Pluto.Notebook, Pluto.ServerSession})()
│        @ PlutoSliderServer.var"../HTTPRouter.jl" ~/.julia/packages/PlutoSliderServer/N6YBS/src/HTTPRouter.jl:116
│     [11] withtoken(f::PlutoSliderServer.var"../HTTPRouter.jl".var"#4#16"{Dict{Symbol, Any}, Pluto.Notebook, Pluto.ServerSession}, token::Pluto.Token)
│        @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Tokens.jl:19
│     [12] (::PlutoSliderServer.var"../HTTPRouter.jl".var"#serve_staterequest#15"{PlutoSliderServer.var"../Configuration.jl".PlutoDeploySettings, Pluto.ServerSession, PlutoSliderServer.var"../HTTPRouter.jl".var"#get_bonds#13", PlutoSliderServer.var"../HTTPRouter.jl".var"#get_sesh#11"{Vector{PlutoSliderServer.var"../Types.jl".NotebookSession}}, PlutoSliderServer.var"../HTTPRouter.jl".var"#27#28"{String}})(request::HTTP.Messages.Request)
│        @ PlutoSliderServer.var"../HTTPRouter.jl" ~/.julia/packages/PlutoSliderServer/N6YBS/src/HTTPRouter.jl:110
│     [13] (::HTTP.Handlers.Router{typeof(HTTP.Handlers.default404), typeof(HTTP.Handlers.default405), Nothing})(req::HTTP.Messages.Request)
│        @ HTTP.Handlers ~/.julia/packages/HTTP/SN7VW/src/Handlers.jl:439
│     [14] (::PlutoSliderServer.var"../HTTPRouter.jl".var"#29#30"{HTTP.Handlers.Router{typeof(HTTP.Handlers.default404), typeof(HTTP.Handlers.default405), Nothing}})(req::HTTP.Messages.Request)
│        @ PlutoSliderServer.var"../HTTPRouter.jl" ~/.julia/packages/PlutoSliderServer/N6YBS/src/HTTPRouter.jl:308
│     [15] (::HTTP.Handlers.var"#1#2"{PlutoSliderServer.var"../HTTPRouter.jl".var"#29#30"{HTTP.Handlers.Router{typeof(HTTP.Handlers.default404), typeof(HTTP.Handlers.default405), Nothing}}})(stream::HTTP.Streams.Stream{HTTP.Messages.Request, HTTP.Connections.Connection{Sockets.TCPSocket}})
│        @ HTTP.Handlers ~/.julia/packages/HTTP/SN7VW/src/Handlers.jl:58
│     [16] #invokelatest#2
│        @ ./essentials.jl:819 [inlined]
│     [17] invokelatest
│        @ ./essentials.jl:816 [inlined]
│     [18] handle_connection(f::Function, c::HTTP.Connections.Connection{Sockets.TCPSocket}, listener::HTTP.Servers.Listener{Nothing, Sockets.TCPServer}, readtimeout::Int64, access_log::Nothing)
│        @ HTTP.Servers ~/.julia/packages/HTTP/SN7VW/src/Servers.jl:450
│     [19] macro expansion
│        @ ~/.julia/packages/HTTP/SN7VW/src/Servers.jl:386 [inlined]
│     [20] (::HTTP.Servers.var"#16#17"{HTTP.Handlers.var"#1#2"{PlutoSliderServer.var"../HTTPRouter.jl".var"#29#30"{HTTP.Handlers.Router{typeof(HTTP.Handlers.default404), typeof(HTTP.Handlers.default405), Nothing}}}, HTTP.Servers.Listener{Nothing, Sockets.TCPServer}, Set{HTTP.Connections.Connection}, Int64, Nothing, Base.Semaphore, HTTP.Connections.Connection{Sockets.TCPSocket}})()
│        @ HTTP.Servers ./task.jl:514
│    
│        nested task error: Distributed.ProcessExitedException(3)
│        Stacktrace:
│          [1] worker_from_id(pg::Distributed.ProcessGroup, i::Int64)
│            @ Distributed /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/cluster.jl:1093
│          [2] worker_from_id
│            @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/cluster.jl:1090 [inlined]
│          [3] #remotecall_fetch#162
│            @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
│          [4] remotecall_fetch
│            @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
│          [5] remotecall_eval
│            @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/macros.jl:242 [inlined]
│          [6] create_emptyworkspacemodule(pid::Int64)
│            @ Pluto.WorkspaceManager ~/.julia/packages/Pluto/8HhXY/src/evaluation/WorkspaceManager.jl:280
│          [7] bump_workspace_module(session_notebook::Tuple{Pluto.ServerSession, Pluto.Notebook})
│            @ Pluto.WorkspaceManager ~/.julia/packages/Pluto/8HhXY/src/evaluation/WorkspaceManager.jl:258
│          [8] run_reactive_core!(session::Pluto.ServerSession, notebook::Pluto.Notebook, old_topology::Pluto.NotebookTopology, new_topology::Pluto.NotebookTopology, roots::Vector{Pluto.Cell}; save::Bool, deletion_hook::Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, user_requested_run::Bool, already_run::Vector{Pluto.Cell}, bond_value_pairs::Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}})
│            @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:59
│          [9] run_reactive_core!
│            @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:44 [inlined]
│         [10] #267
│            @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:22 [inlined]
│         [11] withtoken(f::Pluto.var"#267#268"{Bool, Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, Bool, Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}}, Pluto.ServerSession, Pluto.Notebook, Pluto.NotebookTopology, Pluto.NotebookTopology, Vector{Pluto.Cell}}, token::Pluto.Token)
│            @ Pluto ~/.julia/packages/Pluto/8HhXY/src/evaluation/Tokens.jl:19
│         [12] #run_reactive!#266
│            @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:21 [inlined]
│         [13] run_reactive!
│            @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:10 [inlined]
│         [14] #288
│            @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Run.jl:261 [inlined]
│         [15] macro expansion
│            @ ~/.julia/packages/Pluto/8HhXY/src/evaluation/Tokens.jl:58 [inlined]
│         [16] (::Pluto.var"#290#291"{Pluto.var"#288#289"{Base.Pairs{Symbol, Any, NTuple{4, Symbol}, NamedTuple{(:deletion_hook, :save, :user_requested_run, :bond_value_pairs), Tuple{Pluto.var"#custom_deletion_hook#328"{Pluto.var"#custom_deletion_hook#324#329"{Set{Symbol}, Vector{Any}, Vector{Symbol}}}, Bool, Bool, Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Any}}}}}}, Pluto.ServerSession, Pluto.Notebook, Pluto.NotebookTopology, Pluto.NotebookTopology, Vector{Pluto.Cell}}})()
│            @ Pluto ./task.jl:514
└ @ PlutoSliderServer.var"../HTTPRouter.jl" /home/MLCourse/.julia/packages/PlutoSliderServer/N6YBS/src/HTTPRouter.jl:128

Another error I have in the logs is

UNHANDLED TASK ERROR: EOFError: read end of file
Stacktrace:
 [1] (::Base.var"#wait_locked#715")(s::Sockets.TCPSocket, buf::IOBuffer, nb::Int64)
   @ Base ./stream.jl:947
 [2] unsafe_read(s::Sockets.TCPSocket, p::Ptr{UInt8}, nb::UInt64)
   @ Base ./stream.jl:955
 [3] unsafe_read
   @ ./io.jl:761 [inlined]
 [4] unsafe_read(s::Sockets.TCPSocket, p::Base.RefValue{NTuple{4, Int64}}, n::Int64)
   @ Base ./io.jl:760
 [5] read!
   @ ./io.jl:762 [inlined]
 [6] deserialize_hdr_raw
   @ /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/messages.jl:167 [inlined]
 [7] message_handler_loop(r_stream::Sockets.TCPSocket, w_stream::Sockets.TCPSocket, incoming::Bool)
   @ Distributed /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:172
 [8] process_tcp_streams(r_stream::Sockets.TCPSocket, w_stream::Sockets.TCPSocket, incoming::Bool)
   @ Distributed /usr/local/julia/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:133
 [9] (::Distributed.var"#103#104"{Sockets.TCPSocket, Sockets.TCPSocket, Bool})()
   @ Distributed ./task.jl:514

@pankgeorg We talked about this a couple times, could you write down what you think about this?

Any news on this @pankgeorg @fonsp ?

I mentioned this before, I have similar issues - my guess always was that it has something to do with my VM and the memory ballooning, but I have never debugged it further.

It would interesting to monitor memory usage as Pluto[SliderServer] "leaks" memory by not being able to free certain types of objects.