[Feature Request] Addition of new operation to Tensorflow Lite for "ENet"
PINTO0309 opened this issue · 1 comments
Please make sure that this is a feature request. As per our GitHub Policy, we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:feature_template
System information
- TensorFlow version (you are using): Tensorflow Lite v1.11.0 (Self-Build)
- Hardware: RaspberryPi3
- OS: Raspbian Stretch
- Are you willing to contribute it (Yes/No): No (Because, There are few Custom Operation tutorials, and I can not write C++ programs.)
Describe the feature and the current behavior/state.
If I implement "semantic segmentation" model "ENet", I can not do it unless you support various behaviors of the unpooling layer.
-
Layer that I would like to support with Tensorflow Lite
FloorModRangeRank- Abs
- MaxPoolWithArgmax
ScatterNd- SparseTensor
- sparse_add
gather_nd
-
Sample message of Unsupport Error
Traceback (most recent call last):
File "main.py", line 5, in <module>
interpreter = tf.contrib.lite.Interpreter(model_path="semanticsegmentation_enet_non_quantized.tflite")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/lite/python/interpreter.py", line 53, in __init__
model_path))
ValueError: Didn't find custom op for name 'FloorMod' with version 1
Didn't find custom op for name 'Range' with version 1
Didn't find custom op for name 'Rank' with version 1
Didn't find custom op for name 'Abs' with version 1
Didn't find custom op for name 'MaxPoolWithArgmax' with version 1
Didn't find custom op for name 'ScatterNd' with version 1
Registration failed.
Will this change the current api? How?
Yes.
Custom Operation must be officially implemented.
Who will benefit with this feature?
- Engineers who study automatic driving technology in general
- Engineer who studies fast inference using lightweight mobile
- Engineers to study lightweight models
Any Other info.
My repository and sample program are below.
For Python2.x / Python3.x.
https://github.com/PINTO0309/TensorFlow-ENet.git
https://github.com/PINTO0309/TensorflowLite-UNet.git
【Reference】 Model Logic
import tensorflow as tf
from tensorflow.contrib.layers.python.layers import initializers
slim = tf.contrib.slim
@slim.add_arg_scope
def prelu(x, scope, decoder=False):
#If decoder, then perform relu and just return the output
if decoder:
return tf.nn.relu(x, name=scope)
alpha= tf.get_variable(scope + 'alpha', x.get_shape()[-1],
initializer=tf.constant_initializer(0.0),
dtype=tf.float32)
pos = tf.nn.relu(x)
neg = alpha * (x - abs(x)) * 0.5
return pos + neg
def spatial_dropout(x, p, seed, scope, is_training=True):
if is_training:
keep_prob = 1.0 - p
input_shape = x.get_shape().as_list()
noise_shape = tf.constant(value=[input_shape[0], 1, 1, input_shape[3]])
output = tf.nn.dropout(x, keep_prob, noise_shape, seed=seed, name=scope)
return output
return x
def unpool(updates, mask, k_size=[1, 2, 2, 1], output_shape=None, scope=''):
with tf.variable_scope(scope):
mask = tf.cast(mask, tf.int32)
input_shape = tf.shape(updates, out_type=tf.int32)
# calculation new shape
if output_shape is None:
output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])
# calculation indices for batch, height, width and feature maps
one_like_mask = tf.ones_like(mask, dtype=tf.int32)
batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0)
batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int32), shape=batch_shape)
b = one_like_mask * batch_range
y = mask // (output_shape[2] * output_shape[3])
x = (mask // output_shape[3]) % output_shape[2] #mask % (output_shape[2] * output_shape[3]) // output_shape[3]
feature_range = tf.range(output_shape[3], dtype=tf.int32)
f = one_like_mask * feature_range
# transpose indices & reshape update values to one dimension
updates_size = tf.size(updates)
indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
values = tf.reshape(updates, [updates_size])
ret = tf.scatter_nd(indices, values, output_shape)
return ret
@slim.add_arg_scope
def initial_block(inputs, is_training=True, scope='initial_block'):
#Convolutional branch
net_conv = slim.conv2d(inputs, 13, [3,3], stride=2, activation_fn=None, scope=scope+'_conv')
net_conv = slim.batch_norm(net_conv, is_training=is_training, fused=True, scope=scope+'_batchnorm')
net_conv = prelu(net_conv, scope=scope+'_prelu')
#Max pool branch
net_pool = slim.max_pool2d(inputs, [2,2], stride=2, scope=scope+'_max_pool')
#Concatenated output - does it matter max pool comes first or conv comes first? probably not.
net_concatenated = tf.concat([net_conv, net_pool], axis=3, name=scope+'_concat')
return net_concatenated
@slim.add_arg_scope
def bottleneck(inputs,
output_depth,
filter_size,
regularizer_prob,
projection_ratio=4,
seed=0,
is_training=True,
downsampling=False,
upsampling=False,
pooling_indices=None,
output_shape=None,
dilated=False,
dilation_rate=None,
asymmetric=False,
decoder=False,
scope='bottleneck'):
#Calculate the depth reduction based on the projection ratio used in 1x1 convolution.
reduced_depth = int(inputs.get_shape().as_list()[3] / projection_ratio)
with slim.arg_scope([prelu], decoder=decoder):
#=============DOWNSAMPLING BOTTLENECK====================
if downsampling:
#=============MAIN BRANCH=============
#Just perform a max pooling
net_main, pooling_indices = tf.nn.max_pool_with_argmax(inputs,
ksize=[1,2,2,1],
strides=[1,2,2,1],
padding='SAME',
name=scope+'_main_max_pool')
#First get the difference in depth to pad, then pad with zeros only on the last dimension.
inputs_shape = inputs.get_shape().as_list()
depth_to_pad = abs(inputs_shape[3] - output_depth)
paddings = tf.convert_to_tensor([[0,0], [0,0], [0,0], [0, depth_to_pad]])
net_main = tf.pad(net_main, paddings=paddings, name=scope+'_main_padding')
#=============SUB BRANCH==============
#First projection that has a 2x2 kernel and stride 2
net = slim.conv2d(inputs, reduced_depth, [2,2], stride=2, scope=scope+'_conv1')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
net = prelu(net, scope=scope+'_prelu1')
#Second conv block
net = slim.conv2d(net, reduced_depth, [filter_size, filter_size], scope=scope+'_conv2')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
net = prelu(net, scope=scope+'_prelu2')
#Final projection with 1x1 kernel
net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
net = prelu(net, scope=scope+'_prelu3')
#Regularizer
net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
#Finally, combine the two branches together via an element-wise addition
net = tf.add(net, net_main, name=scope+'_add')
net = prelu(net, scope=scope+'_last_prelu')
#also return inputs shape for convenience later
return net, pooling_indices, inputs_shape
#============DILATION CONVOLUTION BOTTLENECK====================
#Everything is the same as a regular bottleneck except for the dilation rate argument
elif dilated:
#Check if dilation rate is given
if not dilation_rate:
raise ValueError('Dilation rate is not given.')
#Save the main branch for addition later
net_main = inputs
#First projection with 1x1 kernel (dimensionality reduction)
net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
net = prelu(net, scope=scope+'_prelu1')
#Second conv block --- apply dilated convolution here
net = slim.conv2d(net, reduced_depth, [filter_size, filter_size], rate=dilation_rate, scope=scope+'_dilated_conv2')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
net = prelu(net, scope=scope+'_prelu2')
#Final projection with 1x1 kernel (Expansion)
net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
net = prelu(net, scope=scope+'_prelu3')
#Regularizer
net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
net = prelu(net, scope=scope+'_prelu4')
#Add the main branch
net = tf.add(net_main, net, name=scope+'_add_dilated')
net = prelu(net, scope=scope+'_last_prelu')
return net
#===========ASYMMETRIC CONVOLUTION BOTTLENECK==============
#Everything is the same as a regular bottleneck except for a [5,5] kernel decomposed into two [5,1] then [1,5]
elif asymmetric:
#Save the main branch for addition later
net_main = inputs
#First projection with 1x1 kernel (dimensionality reduction)
net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
net = prelu(net, scope=scope+'_prelu1')
#Second conv block --- apply asymmetric conv here
net = slim.conv2d(net, reduced_depth, [filter_size, 1], scope=scope+'_asymmetric_conv2a')
net = slim.conv2d(net, reduced_depth, [1, filter_size], scope=scope+'_asymmetric_conv2b')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
net = prelu(net, scope=scope+'_prelu2')
#Final projection with 1x1 kernel
net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
net = prelu(net, scope=scope+'_prelu3')
#Regularizer
net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
net = prelu(net, scope=scope+'_prelu4')
#Add the main branch
net = tf.add(net_main, net, name=scope+'_add_asymmetric')
net = prelu(net, scope=scope+'_last_prelu')
return net
#============UPSAMPLING BOTTLENECK================
#Everything is the same as a regular one, except convolution becomes transposed.
elif upsampling:
#Check if pooling indices is given
if pooling_indices == None:
raise ValueError('Pooling indices are not given.')
#Check output_shape given or not
if output_shape == None:
raise ValueError('Output depth is not given')
#=======MAIN BRANCH=======
#Main branch to upsample. output shape must match with the shape of the layer that was pooled initially, in order
#for the pooling indices to work correctly. However, the initial pooled layer was padded, so need to reduce dimension
#before unpooling. In the paper, padding is replaced with convolution for this purpose of reducing the depth!
net_unpool = slim.conv2d(inputs, output_depth, [1,1], scope=scope+'_main_conv1')
net_unpool = slim.batch_norm(net_unpool, is_training=is_training, scope=scope+'batch_norm1')
net_unpool = unpool(net_unpool, pooling_indices, output_shape=output_shape, scope='unpool')
#======SUB BRANCH=======
#First 1x1 projection to reduce depth
net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
net = prelu(net, scope=scope+'_prelu1')
#Second conv block -----------------------------> NOTE: using tf.nn.conv2d_transpose for variable input shape.
net_unpool_shape = net_unpool.get_shape().as_list()
output_shape = [net_unpool_shape[0], net_unpool_shape[1], net_unpool_shape[2], reduced_depth]
output_shape = tf.convert_to_tensor(output_shape)
filter_size = [filter_size, filter_size, reduced_depth, reduced_depth]
filters = tf.get_variable(shape=filter_size, initializer=initializers.xavier_initializer(), dtype=tf.float32, name=scope+'_transposed_conv2_filters')
# net = slim.conv2d_transpose(net, reduced_depth, [filter_size, filter_size], stride=2, scope=scope+'_transposed_conv2')
net = tf.nn.conv2d_transpose(net, filter=filters, strides=[1,2,2,1], output_shape=output_shape, name=scope+'_transposed_conv2')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
net = prelu(net, scope=scope+'_prelu2')
#Final projection with 1x1 kernel
net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm4')
net = prelu(net, scope=scope+'_prelu3')
#Regularizer
net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
net = prelu(net, scope=scope+'_prelu4')
#Finally, add the unpooling layer and the sub branch together
net = tf.add(net, net_unpool, name=scope+'_add_upsample')
net = prelu(net, scope=scope+'_last_prelu')
return net
#OTHERWISE, just perform a regular bottleneck!
#==============REGULAR BOTTLENECK==================
#Save the main branch for addition later
net_main = inputs
#First projection with 1x1 kernel
net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
net = prelu(net, scope=scope+'_prelu1')
#Second conv block
net = slim.conv2d(net, reduced_depth, [filter_size, filter_size], scope=scope+'_conv2')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
net = prelu(net, scope=scope+'_prelu2')
#Final projection with 1x1 kernel
net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
net = prelu(net, scope=scope+'_prelu3')
#Regularizer
net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
net = prelu(net, scope=scope+'_prelu4')
#Add the main branch
net = tf.add(net_main, net, name=scope+'_add_regular')
net = prelu(net, scope=scope+'_last_prelu')
return net
#Now actually start building the network
def ENet(inputs,
num_classes,
batch_size,
num_initial_blocks=1,
stage_two_repeat=2,
skip_connections=True,
reuse=None,
is_training=True,
scope='ENet'):
#Set the shape of the inputs first to get the batch_size information
inputs_shape = inputs.get_shape().as_list()
inputs.set_shape(shape=(batch_size, inputs_shape[1], inputs_shape[2], inputs_shape[3]))
with tf.variable_scope(scope, reuse=reuse):
#Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
slim.arg_scope([slim.batch_norm], fused=True), \
slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
#=================INITIAL BLOCK=================
net = initial_block(inputs, scope='initial_block_1')
for i in xrange(2, max(num_initial_blocks, 1) + 1):
net = initial_block(net, scope='initial_block_' + str(i))
#Save for skip connection later
if skip_connections:
net_one = net
#===================STAGE ONE=======================
net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
#Save for skip connection later
if skip_connections:
net_two = net
#regularization prob is 0.1 from bottleneck 2.0 onwards
with slim.arg_scope([bottleneck], regularizer_prob=0.1):
net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
#Repeat the stage two at least twice to get stage 2 and 3:
for i in xrange(2, max(stage_two_repeat, 2) + 2):
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
#===================STAGE FOUR========================
bottleneck_scope_name = "bottleneck" + str(i + 1)
#The decoder section, so start to upsample.
net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
#Perform skip connections here
if skip_connections:
net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
#===================STAGE FIVE========================
bottleneck_scope_name = "bottleneck" + str(i + 2)
net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
#perform skip connections here
if skip_connections:
net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
#=============FINAL CONVOLUTION=============
logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
return logits, probabilities
def ENet_arg_scope(weight_decay=2e-4,
batch_norm_decay=0.1,
batch_norm_epsilon=0.001):
# Set weight_decay for weights in conv2d and separable_conv2d layers.
with slim.arg_scope([slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay),
biases_regularizer=slim.l2_regularizer(weight_decay)):
# Set parameters for batch_norm.
with slim.arg_scope([slim.batch_norm],
decay=batch_norm_decay,
epsilon=batch_norm_epsilon) as scope:
return scope
I close this issue due to the emergence of high performance models such as Deeplab v3 and Deeplab-slim.