MilesCranmer/SymbolicRegression.jl

[BUG]: TaskFailedException when using eval_grad_tree_array and batching=true

Opened this issue · 3 comments

What happened?

function loss_fnc(tree, dataset::Dataset{T,L}, options, idx) where {T,L}    
    # Extract data for the given indices
    X = idx === nothing ? dataset.X : view(dataset.X, :, idx)
    y = idx === nothing ? dataset.y : view(dataset.y, idx)
    weights = idx === nothing ? dataset.weights : view(dataset.weights, idx)
    prediction, grad, complete = eval_grad_tree_array(tree, X, options;variable=true)
    if !complete
        return L(Inf)
    end
    println("Size of grad: ", size(grad))
   #....remainingcode

This throws a nested task error: TaskFailedException when batching=true. Furthermore, size of grad: (6, 4927), rather than grad: (6, 50) for 6 features.
The code works if I turn off batching.

If I switch to diff instead of grad for one feature:

  prediction, diff, complete = eval_diff_tree_array(tree, X, options,3)
    if !complete
        return L(Inf)
    end
    println("Size of diff: ", size(diff))

This now correctly works with batching and outputs the expected size of 50.

The error triggers shortly after Info: Started!
I am on Julia version 1.10.4

Initialisation:

model = SRRegressor(
    niterations=1000000,
    binary_operators=[+,-,*,/],
    maxsize=60,
    bumper=true,
    turbo=true,
    #warm_start=true,
    populations=18,
    population_size=100,
    parsimony = 0.01,
    batching=true,
    loss_function = loss_fnc,
)
mach = machine(model, x, y,weightsIn)
fit!(mach)

I apologise if I am overlooking something.

Version

v1.0.0-beta1

Operating System

Windows

Interface

Julia REPL

Relevant log output

1-element ExceptionStack:
TaskFailedException
Stacktrace:
  [1] wait
    @ .\task.jl:352 [inlined]
  [2] fetch
    @ .\task.jl:372 [inlined]
  [3] _main_search_loop!(state::SymbolicRegression.SearchUtilsModule.SearchState{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Task, Channel}, datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}, ropt::SymbolicRegression.SearchUtilsModule.RuntimeOptions{:multithreading, 1, true}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing})
    @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:912
  [4] _equation_search(datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}, ropt::SymbolicRegression.SearchUtilsModule.RuntimeOptions{:multithreading, 1, true}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, saved_state::Nothing)
    @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:629
  [5] equation_search(datasets::Vector{Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}; niterations::Int64, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, parallelism::Symbol, numprocs::Nothing, procs::Nothing, addprocs_function::Nothing, heap_size_hint_in_bytes::Nothing, runtests::Bool, saved_state::Nothing, return_state::Bool, verbosity::Int64, progress::Nothing, v_dim_out::Val{1})
    @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:601
  [6] equation_search
    @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:479 [inlined]
  [7] equation_search(X::Matrix{Float32}, y::Matrix{Float32}; niterations::Int64, weights::Vector{Float32}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, variable_names::Vector{String}, display_variable_names::Vector{String}, y_variable_names::Nothing, parallelism::Symbol, numprocs::Nothing, procs::Nothing, addprocs_function::Nothing, heap_size_hint_in_bytes::Nothing, runtests::Bool, saved_state::Nothing, return_state::Bool, loss_type::Type{Nothing}, verbosity::Int64, progress::Nothing, X_units::Nothing, y_units::Nothing, extra::@NamedTuple{}, v_dim_out::Val{1}, multithreaded::Nothing, varMap::Nothing)
    @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:442
  [8] #equation_search#28
    @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:472 [inlined]
  [9] _update(m::SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, verbosity::Int64, old_fitresult::Nothing, old_cache::Nothing, X::Matrix{Float32}, y::Vector{Float32}, w::Vector{Float32}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, classes::Nothing)
    @ SymbolicRegression.MLJInterfaceModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\MLJInterface.jl:191
 [10] update(m::SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, verbosity::Int64, old_fitresult::Nothing, old_cache::Nothing, X::Matrix{Float32}, y::Vector{Float32}, w::Vector{Float32})
    @ SymbolicRegression.MLJInterfaceModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\MLJInterface.jl:142
 [11] fit(m::SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, verbosity::Int64, X::Matrix{Float32}, y::Vector{Float32}, w::Vector{Float32})
    @ SymbolicRegression.MLJInterfaceModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\MLJInterface.jl:136
 [12] fit_only!(mach::Machine{SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, true}; rows::Nothing, verbosity::Int64, force::Bool, composite::Nothing)
    @ MLJBase C:\Users\georg\.julia\packages\MLJBase\7nGJF\src\machines.jl:692
 [13] fit_only!
    @ C:\Users\georg\.julia\packages\MLJBase\7nGJF\src\machines.jl:617 [inlined]
 [14] #fit!#63
    @ C:\Users\georg\.julia\packages\MLJBase\7nGJF\src\machines.jl:789 [inlined]
 [15] fit!(mach::Machine{SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, SRRegressor{DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}, DataType}, true})
    @ MLJBase C:\Users\georg\.julia\packages\MLJBase\7nGJF\src\machines.jl:786
 [16] top-level scope
    @ REPL[45]:1

    nested task error: TaskFailedException
    Stacktrace:
     [1] wait
       @ .\task.jl:352 [inlined]
     [2] fetch
       @ .\task.jl:372 [inlined]
     [3] (::SymbolicRegression.var"#63#68"{SymbolicRegression.SearchUtilsModule.SearchState{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Task, Channel}, Int64, Int64})()
       @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:889

        nested task error: TypeError: in typeassert, expected Tuple{Vector{Float32}, SubArray{Float32, 2, Matrix{Float32}, Tuple{Base.Slice{Base.OneTo{Int64}}, Vector{Int64}}, false}, Bool}, got a value of type Tuple{Vector{Float32}, LinearAlgebra.Adjoint{Float32, Matrix{Float32}}, Bool}
        Stacktrace:
          [1] eval_grad_tree_array(tree::DynamicExpressions.NodeModule.Node{Float32}, X::SubArray{Float32, 2, Matrix{Float32}, Tuple{Base.Slice{Base.OneTo{Int64}}, Vector{Int64}}, false}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}; kws::@Kwargs{variable::Bool})
            @ SymbolicRegression.InterfaceDynamicExpressionsModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\InterfaceDynamicExpressions.jl:155
          [2] eval_grad_tree_array
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\InterfaceDynamicExpressions.jl:147 [inlined]
          [3] loss_fnc(tree::DynamicExpressions.NodeModule.Node{Float32}, dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, idx::Vector{Int64})
            @ Main .\REPL[42]:6
          [4] evaluator(f::typeof(loss_fnc), tree::DynamicExpressions.NodeModule.Node{Float32}, dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, idx::Vector{Int64})
            @ SymbolicRegression.LossFunctionsModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:91
          [5] eval_loss(tree::Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}; regularization::Bool, idx::Vector{Int64})
            @ SymbolicRegression.LossFunctionsModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:116
          [6] eval_loss
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:105 [inlined]
          [7] #eval_loss_batched#4
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:130 [inlined]
          [8] eval_loss_batched
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:122 [inlined]
          [9] score_func_batched(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, member::PopMember{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}; complexity::Nothing, idx::Nothing)
            @ SymbolicRegression.LossFunctionsModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:192
         [10] score_func_batched
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\LossFunctions.jl:185 [inlined]
         [11] next_generation(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, member::PopMember{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, temperature::Float64, curmaxsize::Int64, running_search_statistics::SymbolicRegression.AdaptiveParsimonyModule.RunningSearchStatistics, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}; tmp_recorder::Dict{String, Any})
            @ SymbolicRegression.MutateModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\Mutate.jl:126
         [12] next_generation
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\Mutate.jl:108 [inlined]
         [13] reg_evol_cycle(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, pop::Population{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, temperature::Float64, curmaxsize::Int64, running_search_statistics::SymbolicRegression.AdaptiveParsimonyModule.RunningSearchStatistics, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, record::Dict{String, Any})
            @ SymbolicRegression.RegularizedEvolutionModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\RegularizedEvolution.jl:36
         [14] s_r_cycle(dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, pop::Population{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, ncycles::Int64, curmaxsize::Int64, running_search_statistics::SymbolicRegression.AdaptiveParsimonyModule.RunningSearchStatistics; verbosity::Int64, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, record::Dict{String, Any})
            @ SymbolicRegression.SingleIterationModule C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SingleIteration.jl:47
         [15] s_r_cycle
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SingleIteration.jl:19 [inlined]
         [16] _dispatch_s_r_cycle(in_pop::Population{Float32, Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}}, dataset::Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}; pop::Int64, out::Int64, iteration::Int64, verbosity::Int64, cur_maxsize::Int64, running_search_statistics::SymbolicRegression.AdaptiveParsimonyModule.RunningSearchStatistics)
            @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:1145
         [17] macro expansion
            @ C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SymbolicRegression.jl:847 [inlined]
         [18] (::SymbolicRegression.var"#60#62"{Float32, Expression{Float32, DynamicExpressions.NodeModule.Node{Float32}, @NamedTuple{operators::Nothing, variable_names::Nothing}}, Float32, SymbolicRegression.SearchUtilsModule.RuntimeOptions{:multithreading, 1, true}, Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, DynamicExpressions.OperatorEnumModule.OperatorEnum, DynamicExpressions.NodeModule.Node, Expression, @NamedTuple{}, true, true, nothing, Nothing}, Int64, Task, SymbolicRegression.AdaptiveParsimonyModule.RunningSearchStatistics, Int64, Dataset{Float32, Float32, Matrix{Float32}, Vector{Float32}, Vector{Float32}, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, Int64})()
            @ SymbolicRegression C:\Users\georg\.julia\packages\SymbolicRegression\TMsdA\src\SearchUtils.jl:121

Extra Info

No response

Thanks for the report. It looks like the key part of the error is

TypeError: in typeassert, expected Tuple{Vector{Float32}, SubArray{Float32, 2, Matrix{Float32}, Tuple{Base.Slice{Base.OneTo{Int64}}, Vector{Int64}}, false}, Bool}, got a value of type Tuple{Vector{Float32}, LinearAlgebra.Adjoint{Float32, Matrix{Float32}}, Bool}

As a temporary workaround I think you can replace

view(dataset.X, :, idx)

With

dataset.X[:, idx]

The fault looks to be a bug so I’ll try to fix it for 1.0.0.

Also note that even when batching is true, Sometimes SR.jl will still do full evaluation with idx == nothing, for things like comparing expressions in the hall of fame, since otherwise an expression might “get lucky” and be recorded as the best expression seen due to an easier batch.

Thank you, the workaround worked. I did notice the full n popping up occasionally with the println when batching was on. Cool to see it working :)