Session3 vae.VAE decoding layers don't use transposed W from encoding layers
Closed this issue · 1 comments
bordakov commented
Session3 vae.VAE code for decoding layers is
shapes.reverse()
n_filters.reverse()
Ws.reverse()
n_filters += [input_shape[-1]]
# %%
# Decoding layers
for layer_i, n_output in enumerate(n_filters[1:]):
with tf.variable_scope('decoder/{}'.format(layer_i)):
shape = shapes[layer_i + 1]
if convolutional:
h, W = utils.deconv2d(x=current_input,
n_output_h=shape[1],
n_output_w=shape[2],
n_output_ch=shape[3],
n_input_ch=shapes[layer_i][3],
k_h=filter_sizes[layer_i],
k_w=filter_sizes[layer_i])
else:
h, W = utils.linear(x=current_input,
n_output=n_output)
h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
if dropout:
h = tf.nn.dropout(h, keep_prob)
current_input = h
y = current_input
x_flat = utils.flatten(x)
y_flat = utils.flatten(y)
# l2 loss
loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)
This code seems to create new variables in utils.deconv2d and utils.linear
def linear(x, n_output, name=None, activation=None, reuse=None):
"""Fully connected layer.
Parameters
----------
x : tf.Tensor
Input tensor to connect
n_output : int
Number of output neurons
name : None, optional
Scope to apply
Returns
-------
h, W : tf.Tensor, tf.Tensor
Output of fully connected layer and the weight matrix
"""
if len(x.get_shape()) != 2:
x = flatten(x, reuse=reuse)
n_input = x.get_shape().as_list()[1]
with tf.variable_scope(name or "fc", reuse=reuse):
W = tf.get_variable(
name='W',
shape=[n_input, n_output],
dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable(
name='b',
shape=[n_output],
dtype=tf.float32,
initializer=tf.constant_initializer(0.0))
h = tf.nn.bias_add(
name='h',
value=tf.matmul(x, W),
bias=b)
if activation:
h = activation(h)
return h, W
rather than using transposed Ws from encoding layers such as in session 3 lecture
for layer_i, n_output in enumerate(dimensions):
# we'll use a variable scope again to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it.
with tf.variable_scope("decoder/layer/{}".format(layer_i)):
# Now we'll grab the weight matrix we created before and transpose it
# So a 3072 x 784 matrix would become 784 x 3072
# or a 256 x 64 matrix, would become 64 x 256
W = tf.transpose(Ws[layer_i])
b = tf.get_variable(
name='b',
shape=[n_output],
dtype=tf.float32,
initializer=tf.constant_initializer(0.0))
# Now we'll multiply our input by our transposed W matrix
# and add the bias
h = tf.nn.bias_add(
name='h',
value=tf.matmul(current_input, W),
bias=b)
# And then use a relu activation function on its output
current_input = tf.nn.relu(h)
# We'll also replace n_input with the current n_output, so that on the
# next iteration, our new number inputs will be correct.
n_input = n_output
Why? I am a rookie and don't understand. Please explain.
pkmital commented
You can add the transposed layers as a regularization method, or leave the network to learn entirely new filters. That is up to you and worth exploring both options to see how it effects the result! Good luck!