JuliaGPU/XLA.jl

ProcessExitedException() when calling train.minimize

NTimmons opened this issue · 1 comments

I am unable to run the example code from TensorFlow.jl without getting a ProcessExitedException() when calling train.minimize() in the CoLab environment

Input Code:

using TensorFlow
using MLDatasets
import Random

mutable struct DataLoader
    cur_id::Int
    order::Vector{Int}
end

DataLoader() = DataLoader(1, Random.shuffle(1:60000))

function next_batch(loader::DataLoader, batch_size)
    x = zeros(Float32, batch_size, 784)
    y = zeros(Float32, batch_size, 10)
    for i in 1:batch_size
        data, label = MLDatasets.MNIST.traindata(loader.order[loader.cur_id])
        x[i, :] = reshape(data, (28*28))
        y[i, Int(label)+1] = 1.0
        loader.cur_id += 1
        if loader.cur_id > 60000
            loader.cur_id = 1
        end
    end
    x, y
end

function load_test_set(N=10000)
    x = zeros(Float32, N, 784)
    y = zeros(Float32, N, 10)
    for i in 1:N
        data, label = MLDatasets.MNIST.testdata(i)
        x[i, :] = reshape(data, (28*28))
        y[i, Int(label)+1] = 1.0
    end
    x,y
end


loader = DataLoader()

sess = Session(Graph(); target="grpc://"*ENV["COLAB_TPU_ADDR"])

x = placeholder(Float32)
y_ = placeholder(Float32)

W = Variable(zeros(Float32, 784, 10))
b = Variable(zeros(Float32, 10))

run(sess, global_variables_initializer())

y = nn.softmax(x*W + b)

cross_entropy = reduce_mean(-reduce_sum(y_ .* log(y), axis=[2]))
train_step = train.minimize(train.GradientDescentOptimizer(.00001), cross_entropy)

correct_prediction = argmax(y, 2) .== argmax(y_, 2)
accuracy=reduce_mean(cast(correct_prediction, Float32))

for i in 1:1000
    batch = next_batch(loader, 100)
    run(sess, train_step, Dict(x=>batch[1], y_=>batch[2]))
end

testx, testy = load_test_set()

println(run(sess, accuracy, Dict(x=>testx, y_=>testy)))

Output:

┌ Info: Precompiling MLDatasets [eb30cadb-4394-5ae3-aed4-317e484a6458]
└ @ Base loading.jl:1186
┌ Warning: Module Compat with build ID 1186987443319 is missing from the cache.
│ This may mean Compat [34da2185-b29b-5c13-b0c7-acf172513d20] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947
┌ Warning: Module Compat with build ID 1186987443319 is missing from the cache.
│ This may mean Compat [34da2185-b29b-5c13-b0c7-acf172513d20] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947
┌ Info: Precompiling BinDeps [9e28174c-4ba2-5203-b857-d8d62c4213ee]
└ @ Base loading.jl:1186
┌ Warning: Module Compat with build ID 1186987443319 is missing from the cache.
│ This may mean Compat [34da2185-b29b-5c13-b0c7-acf172513d20] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:947
┌ Info: Precompiling URIParser [30578b45-9adc-5946-b283-645ec420af67]
└ @ Base loading.jl:1186
┌ Info: Precompiling DataDeps [124859b0-ceae-595e-8997-d05f6a7a8dfe]
└ @ Base loading.jl:1186
┌ Info: Precompiling GZip [92fee26a-97fe-5a0c-ad85-20a5f3185b63]
└ @ Base loading.jl:1186
ProcessExitedException()

Stacktrace:
 [1] worker_from_id(::Distributed.ProcessGroup, ::Int64) at /workspace/srcdir/julia/usr/share/julia/stdlib/v1.1/Distributed/src/cluster.jl:975
 [2] worker_from_id at /workspace/srcdir/julia/usr/share/julia/stdlib/v1.1/Distributed/src/cluster.jl:972 [inlined]
 [3] #remotecall_wait#157(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64) at /workspace/srcdir/julia/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:433
 [4] remotecall_wait(::Function, ::Int64) at /workspace/srcdir/julia/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:433
 [5] top-level scope at /root/.julia/packages/TensorFlow/YWnga/src/TensorFlow.jl:187
 [6] eval at ./boot.jl:319 [inlined]
 [7] eval at ./sysimg.jl:68 [inlined]
 [8] add_gradients_py(::Tensor{Float32}, ::Array{Any,1}, ::Nothing) at /root/.julia/packages/TensorFlow/YWnga/src/core.jl:1548
 [9] gradients at /root/.julia/packages/TensorFlow/YWnga/src/core.jl:1536 [inlined] (repeats 2 times)
 [10] compute_gradients(::TensorFlow.train.GradientDescentOptimizer, ::Tensor{Float32}, ::Nothing) at /root/.julia/packages/TensorFlow/YWnga/src/train.jl:49
 [11] #minimize#1(::Nothing, ::Nothing, ::Nothing, ::Function, ::TensorFlow.train.GradientDescentOptimizer, ::Tensor{Float32}) at /root/.julia/packages/TensorFlow/YWnga/src/train.jl:41
 [12] minimize(::TensorFlow.train.GradientDescentOptimizer, ::Tensor{Float32}) at /root/.julia/packages/TensorFlow/YWnga/src/train.jl:38
 [13] top-level scope at In[14]:54
Keno commented

I'd recommend filing this at TensorFlow.jl, since this package uses TF as basically a glorified grpc interface and not much more.