timsainb/Tensorflow-MultiGPU-VAE-GAN

apply_gradients doesn't work

Closed this issue · 3 comments

Hi,

I am interested in your work, so I download your code and try to run it.
But, some errors occur at "apply_gradients".

I'm working on this problem a little while, but I cannot fix the error.

Here are the error message.

ValueError Traceback (most recent call last)
in ()
6
7 # apply the gradients with our optimizers
----> 8 train_E = opt_E.apply_gradients(grads_e, global_step=global_step)
9 train_G = opt_G.apply_gradients(grads_g, global_step=global_step)
10 train_D = opt_D.apply_gradients(grads_d, global_step=global_step)

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py in apply_gradients(self, grads_and_vars, global_step, name)
444 ([str(v) for _, _, v in converted_grads_and_vars],))
445 with ops.control_dependencies(None):
--> 446 self._create_slots([_get_variable_for(v) for v in var_list])
447 update_ops = []
448 with ops.name_scope(name, self._name) as name:

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/adam.py in _create_slots(self, var_list)
120 # Create slots for the first and second moments.
121 for v in var_list:
--> 122 self._zeros_slot(v, "m", self._name)
123 self._zeros_slot(v, "v", self._name)
124

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py in _zeros_slot(self, var, slot_name, op_name)
764 named_slots = self._slot_dict(slot_name)
765 if _var_key(var) not in named_slots:
--> 766 named_slots[_var_key(var)] = slot_creator.create_zeros_slot(var, op_name)
767 return named_slots[_var_key(var)]

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py in create_zeros_slot(primary, name, dtype, colocate_with_primary)
172 return create_slot_with_initializer(
173 primary, initializer, slot_shape, dtype, name,
--> 174 colocate_with_primary=colocate_with_primary)
175 else:
176 val = array_ops.zeros(slot_shape, dtype=dtype)

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py in create_slot_with_initializer(primary, initializer, shape, dtype, name, colocate_with_primary)
144 with ops.colocate_with(primary):
145 return _create_slot_var(primary, initializer, "", validate_shape, shape,
--> 146 dtype)
147 else:
148 return _create_slot_var(primary, initializer, "", validate_shape, shape,

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py in _create_slot_var(primary, val, scope, validate_shape, shape, dtype)
64 use_resource=_is_resource(primary),
65 shape=shape, dtype=dtype,
---> 66 validate_shape=validate_shape)
67 variable_scope.get_variable_scope().set_partitioner(current_partitioner)
68

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter)
1047 collections=collections, caching_device=caching_device,
1048 partitioner=partitioner, validate_shape=validate_shape,
-> 1049 use_resource=use_resource, custom_getter=custom_getter)
1050 get_variable_or_local_docstring = (
1051 """%s

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter)
946 collections=collections, caching_device=caching_device,
947 partitioner=partitioner, validate_shape=validate_shape,
--> 948 use_resource=use_resource, custom_getter=custom_getter)
949
950 def _get_partitioned_variable(self,

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter)
354 reuse=reuse, trainable=trainable, collections=collections,
355 caching_device=caching_device, partitioner=partitioner,
--> 356 validate_shape=validate_shape, use_resource=use_resource)
357
358 def _get_partitioned_variable(

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource)
339 trainable=trainable, collections=collections,
340 caching_device=caching_device, validate_shape=validate_shape,
--> 341 use_resource=use_resource)
342
343 if custom_getter is not None:

/home/junsuk/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py in _get_single_variable(self, name, shape, dtype, initializer, regularizer, partition_info, reuse, trainable, collections, caching_device, validate_shape, use_resource)
669 raise ValueError("Variable %s does not exist, or was not created with "
670 "tf.get_variable(). Did you mean to set reuse=None in "
--> 671 "VarScope?" % name)
672 if not shape.is_fully_defined() and not initializing_from_value:
673 raise ValueError("Shape of a new variable (%s) must be fully defined, "

ValueError: Variable enc/conv2d/weights/Adam/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?

Do you have any idea on this error?

I'm sorry for my bad English.

ps. I use TensorFlow version r1.1 prettytensor version 0.7.4 and python 3.6.

@gildorgood I have the same problem as yours. Have you fixed it up?

@Engineering-Course Yes, I have solved this problem.
The error is due to the version-up of Tensorflow.

You can train the network using this modified version:

with graph.as_default():
    # Define the network for each GPU
    all_input = tf.placeholder(tf.float32, [batch_size*num_gpus, dim1*dim2*dim3])
    with tf.variable_scope(tf.get_variable_scope()):
        KL_param = tf.placeholder(tf.float32)
        LL_param = tf.placeholder(tf.float32)
        G_param = tf.placeholder(tf.float32)
    
        for i in range(num_gpus):
              with tf.device('/gpu:%d' % i):
                    with tf.name_scope('Tower_%d' % (i)) as scope:
                        # grab this portion of the input
                        next_batch = all_input[i*batch_size:(i+1)*batch_size,:]

                        # Construct the model
                        z_x_mean, z_x_log_sigma_sq, z_x, x_tilde, l_x_tilde, x_p, d_x, l_x, d_x_p, z_p = inference(next_batch)

                        # Calculate the loss for this tower   
                        SSE_loss, KL_loss, D_loss, G_loss, LL_loss = loss(next_batch, x_tilde, z_x_log_sigma_sq, z_x_mean, d_x, d_x_p, l_x, l_x_tilde, dim1, dim2, dim3)

                        # specify loss to parameters
                        params = tf.trainable_variables()
                        E_params = [i for i in params if 'enc' in i.name]
                        G_params = [i for i in params if 'gen' in i.name]
                        D_params = [i for i in params if 'dis' in i.name]

                        # Calculate the losses specific to encoder, generator, decoder
                        L_e = tf.clip_by_value(KL_loss*KL_param + LL_loss, -100, 100)
                        L_g = tf.clip_by_value(LL_loss*LL_param+G_loss*G_param, -100, 100)
                        L_d = tf.clip_by_value(D_loss, -100, 100)


                        # Reuse variables for the next tower.
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this CIFAR tower.
                        grads_e = opt_E.compute_gradients(L_e, var_list = E_params)
                        grads_g = opt_G.compute_gradients(L_g, var_list = G_params)
                        grads_d = opt_D.compute_gradients(L_d, var_list = D_params)
                        # Keep track of the gradients across all towers.
                        tower_grads_e.append(grads_e)
                        tower_grads_g.append(grads_g)
                        tower_grads_d.append(grads_d)

@gildorgood Thanks a lot.