MobileNet V1的結構較爲簡單,另外,主要的問題仍是在Depthwise Convolution之中,Depthwise Convolution確實下降了計算量,可是 Depthwise 部分的 kernel 訓練容易廢掉,最終再通過ReLU出現輸出爲0的狀況。python
主要架構仍是將MobileNet V1和殘差網絡ResNet的殘差單元結合起來,用Depthwise Convolutions代替殘差單元的bottleneck ,最重要的是與residuals block相反,一般的residuals block是先通過1×1的卷積,下降feature map通道數,而後再經過3×3卷積,最後從新通過1×1卷積將feature map通道數擴張回去;並且爲了不ReLU對特徵的破壞,用線性層替換channel數較少層後的ReLU非線性激活。網絡
本節中傳遞的思想只有一個,即ReLU會對channel數較低的張量形成較大的信息損耗。以下圖所示,當原始輸入維度數增長到15之後再加ReLU,基本不會丟失太多的信息;但若是隻把原始輸入維度增長至2~5後再加ReLU,則會出現較爲嚴重的信息丟失。架構
所以執行降維的卷積層後面不會接相似於ReLU這樣的非線性激活層ide
至於ReLU是如何損失特徵的,ReLU的特性使得對於負值輸入,其輸出爲0,並且降維自己就是特徵壓縮的過程,這樣就使得特徵損失更爲嚴重。spa
首先爲何要倒置殘差?debug
由於MobileNetv2將residuals block的bottleneck替換爲了深度可分離卷積,深度可分離卷積參數少了,提取的特徵也相對較少,若是此時再進行降維壓縮操做,能提取的特徵就更少了。3d
與MobileNetv1和ShuffleNet對比:code
# mobilenet_v2網絡定義 def mobilenet_v2_func_blocks(is_training): assert const.use_batch_norm == True filter_initializer = tf.contrib.layers.xavier_initializer() activation_func = tf.nn.relu6 def conv2d(inputs, filters, kernel_size, stride, scope=''): with tf.variable_scope(scope): with tf.variable_scope('conv2d'): outputs = tf.layers.conv2d(inputs, filters, kernel_size, strides=(stride, stride), padding='same', activation=None, use_bias=False, kernel_initializer=filter_initializer) outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = tf.nn.relu(outputs) return outputs def _1x1_conv2d(inputs, filters, stride): kernel_size = [1, 1] with tf.variable_scope('1x1_conv2d'): outputs = tf.layers.conv2d(inputs, filters, kernel_size, strides=(stride, stride), padding='same', activation=None,use_bias=False, kernel_initializer=filter_initializer) outputs = tf.layers.batch_normalization(outputs, training=is_training) return outputs def expansion_conv2d(inputs, expansion, stride): input_shape = inputs.get_shape().as_list() assert len(input_shape) == 4 filters = input_shape[3] * expansion kernel_size = [1, 1] with tf.variable_scope('expansion_1x1_conv2d'): outputs = tf.layers.conv2d(inputs, filters, kernel_size, strides=(stride, stride), padding='same', activation=None, use_bias=False, kernel_initializer=filter_initializer) outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation_func(outputs) return outputs def projection_conv2d(inputs, filters, stride): kernel_size = [1, 1] with tf.variable_scope('projection_1x1_conv2d'): outputs = tf.layers.conv2d(inputs, filters, kernel_size, strides=(stride, stride), padding='same', activation=None, use_bias=False, kernel_initializer=filter_initializer) outputs = tf.layers.batch_normalization(outputs, training=is_training) return outputs def depthwise_conv2d(inputs, depthwise_conv_kernel_size,stride): with tf.variable_scope('depthwise_conv2d'): outputs = tf.contrib.layers.separable_conv2d( inputs, None, depthwise_conv_kernel_size, depth_multiplier=1, stride=(stride,stride), padding='SAME', activation_fn=None, weights_initializer=filter_initializer, biases_initializer=None) outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = tf.nn.relu(outputs) return outputs def avg_pool2d(inputs, scope=''): inputs_shape = inputs.get_shape().as_list() assert len(inputs_shape) == 4 pool_height = inputs_shape[1] pool_width = inputs_shape[2] with tf.variable_scope(scope): outputs = tf.layers.average_pooling2d(inputs, [pool_height, pool_width], strides=(1, 1),padding='valid') return outputs def inverted_residual_block(inputs, filters, stride, expansion=6,scope=''): assert stride == 1 or stride == 2 depthwise_conv_kernel_size = [3, 3] pointwise_conv_filters = filters with tf.variable_scope(scope): net = inputs net = expansion_conv2d(net, expansion, stride=1) net = depthwise_conv2d(net, depthwise_conv_kernel_size, stride=stride) net = projection_conv2d(net, pointwise_conv_filters, stride=1) if stride == 1: # print('----------------- test, net.get_shape().as_list()[3] = %r' % net.get_shape().as_list()[3]) # print('----------------- test, inputs.get_shape().as_list()[3] = %r' % inputs.get_shape().as_list()[3]) # 若是 net.get_shape().as_list()[3] != inputs.get_shape().as_list()[3] # 藉助一個 1x1 的卷積讓他們的 channels 相等,而後再相加 if net.get_shape().as_list()[3] != inputs.get_shape().as_list()[3]: inputs = _1x1_conv2d(inputs, net.get_shape().as_list()[3], stride=1) net = net + inputs return net else: # stride == 2 return net func_blocks = {} func_blocks['conv2d'] = conv2d func_blocks['inverted_residual_block'] = inverted_residual_block func_blocks['avg_pool2d'] = avg_pool2d func_blocks['filter_initializer'] = filter_initializer func_blocks['activation_func'] = activation_func return func_blocks def mobilenet_v2(inputs, is_training): assert const.use_batch_norm == True func_blocks = mobilenet_v2_func_blocks(is_training) _conv2d = func_blocks['conv2d'] _inverted_residual_block = func_blocks['inverted_residual_block'] _avg_pool2d = func_blocks['avg_pool2d'] with tf.variable_scope('mobilenet_v2', 'mobilenet_v2', [inputs]): end_points = {} net = inputs net = _conv2d(net, 32, [3, 3], stride=2, scope='block0_0') # size/2 end_points['block0'] = net print('!! debug block0, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 16, stride=1, expansion=1, scope='block1_0') end_points['block1'] = net print('!! debug block1, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 24, stride=2, scope='block2_0') # size/4 net = _inverted_residual_block(net, 24, stride=1, scope='block2_1') end_points['block2'] = net print('!! debug block2, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 32, stride=2, scope='block3_0') # size/8 net = _inverted_residual_block(net, 32, stride=1, scope='block3_1') net = _inverted_residual_block(net, 32, stride=1, scope='block3_2') end_points['block3'] = net print('!! debug block3, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 64, stride=2, scope='block4_0') # size/16 net = _inverted_residual_block(net, 64, stride=1, scope='block4_1') net = _inverted_residual_block(net, 64, stride=1, scope='block4_2') net = _inverted_residual_block(net, 64, stride=1, scope='block4_3') end_points['block4'] = net print('!! debug block4, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 96, stride=1, scope='block5_0') net = _inverted_residual_block(net, 96, stride=1, scope='block5_1') net = _inverted_residual_block(net, 96, stride=1, scope='block5_2') end_points['block5'] = net print('!! debug block5, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 160, stride=2, scope='block6_0') # size/32 net = _inverted_residual_block(net, 160, stride=1, scope='block6_1') net = _inverted_residual_block(net, 160, stride=1, scope='block6_2') end_points['block6'] = net print('!! debug block6, net shape is: {}'.format(net.get_shape())) net = _inverted_residual_block(net, 320, stride=1, scope='block7_0') end_points['block7'] = net print('!! debug block7, net shape is: {}'.format(net.get_shape())) net = _conv2d(net, 1280, [1, 1], stride=1, scope='block8_0') end_points['block8'] = net print('!! debug block8, net shape is: {}'.format(net.get_shape())) output = _avg_pool2d(net, scope='output') print('!! debug after avg_pool, net shape is: {}'.format(output.get_shape())) return output, end_points