可變形卷積的論文爲:Deformable Convolutional Networks
而以前google一篇論文對這篇論文有指導意義:Spatial Transformer Networks
Deformable Convolutional 的 github 代碼地址爲:https://github.com/felixlaumon/deform-conv
可變形卷積很好理解,Keras 中的接口爲 ConvOffset2D,但如何實現呢?實現方面須要關注兩個限制:git
三、例,取一個座標值(a,b),將其轉換爲四個整數,floor(a), ceil(a), floor(b), ceil(b),將這四個整數進行整合,獲得四對座標(floor(a),floor(b)), ((floor(a),ceil(b)), ((ceil(a),floor(b)), ((ceil(a),ceil(b))。這四對座標每一個座標都對應U中的一個像素值,而咱們須要獲得(a,b)的像素值,這裏採用雙線性差值的方式計算(一方面獲得的像素準確,另外一方面能夠進行反向傳播)。
def __init__(self, filters, init_normal_stddev=0.01, **kwargs): self.filters = filters super(ConvOffset2D, self).__init__( self.filters * 2, (3, 3), padding='same', use_bias=False, kernel_initializer=RandomNormal(0, init_normal_stddev), **kwargs ) def call(self, x): """Return the deformed featured map""" #獲取x大小,x大小爲(b,h,w,c),分別爲batch_size,圖片高度,圖片寬度,特徵圖大小 x_shape = x.get_shape() #調用普通卷積得到輸出,輸出結果爲(b,h,w,2c)表示圖片中每一個像素須要偏移的量(x,y) offsets = super(ConvOffset2D, self).call(x) #reshape一下輸出,方便後續操做,(b*c,h,w,2)表示共有b*c個圖片,每一個圖片爲h*w大小,每一個像素對應2個方向 # offsets: (b*c, h, w, 2) offsets = self._to_bc_h_w_2(offsets, x_shape) #將原始輸入也從新reshape一下方便後續操做 # x: (b*c, h, w) x = self._to_bc_h_w(x, x_shape) #調用deform_conv.py中的函數根據原始圖片與偏移量生成新圖片數據。 # X_offset: (b*c, h, w) x_offset = tf_batch_map_offsets(x, offsets) # x_offset: (b, h, w, c) x_offset = self._to_b_h_w_c(x_offset, x_shape) return x_offset def compute_output_shape(self, input_shape): """Output shape is the same as input shape Because this layer does only the deformation part """ return input_shape @staticmethod def _to_bc_h_w_2(x, x_shape): """(b, h, w, 2c) -> (b*c, h, w, 2)""" x = tf.transpose(x, [0, 3, 1, 2]) x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]), 2)) return x @staticmethod def _to_bc_h_w(x, x_shape): """(b, h, w, c) -> (b*c, h, w)""" x = tf.transpose(x, [0, 3, 1, 2]) x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]))) return x @staticmethod def _to_b_h_w_c(x, x_shape): """(b*c, h, w) -> (b, h, w, c)""" x = tf.reshape( x, (-1, int(x_shape[3]), int(x_shape[1]), int(x_shape[2])) ) x = tf.transpose(x, [0, 2, 3, 1]) return x
def tf_flatten(a): """Flatten tensor""" return tf.reshape(a, [-1]) def tf_repeat(a, repeats, axis=0): """TensorFlow version of np.repeat for 1D""" # https://github.com/tensorflow/tensorflow/issues/8521 assert len(a.get_shape()) == 1 a = tf.expand_dims(a, -1) a = tf.tile(a, [1, repeats]) a = tf_flatten(a) return a def tf_repeat_2d(a, repeats): """Tensorflow version of np.repeat for 2D""" assert len(a.get_shape()) == 2 a = tf.expand_dims(a, 0) a = tf.tile(a, [repeats, 1, 1]) return a def tf_map_coordinates(input, coords, order=1): """Tensorflow verion of scipy.ndimage.map_coordinates Note that coords is transposed and only 2D is supported Parameters ---------- input : tf.Tensor. shape = (s, s) coords : tf.Tensor. shape = (n_points, 2) """ assert order == 1 coords_lt = tf.cast(tf.floor(coords), 'int32') coords_rb = tf.cast(tf.ceil(coords), 'int32') coords_lb = tf.stack([coords_lt[:, 0], coords_rb[:, 1]], axis=1) coords_rt = tf.stack([coords_rb[:, 0], coords_lt[:, 1]], axis=1) vals_lt = tf.gather_nd(input, coords_lt) vals_rb = tf.gather_nd(input, coords_rb) vals_lb = tf.gather_nd(input, coords_lb) vals_rt = tf.gather_nd(input, coords_rt) coords_offset_lt = coords - tf.cast(coords_lt, 'float32') vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, 0] vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, 0] mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, 1] return mapped_vals def sp_batch_map_coordinates(inputs, coords): """Reference implementation for batch_map_coordinates""" coords = coords.clip(0, inputs.shape[1] - 1) mapped_vals = np.array([ sp_map_coordinates(input, coord.T, mode='nearest', order=1) for input, coord in zip(inputs, coords) ]) return mapped_vals def tf_batch_map_coordinates(input, coords, order=1): """Batch version of tf_map_coordinates Only supports 2D feature maps Parameters ---------- input : tf.Tensor. shape = (b, s, s) coords : tf.Tensor. shape = (b, n_points, 2) Returns ------- tf.Tensor. shape = (b, s, s) """ input_shape = tf.shape(input) batch_size = input_shape[0] input_size = input_shape[1] n_coords = tf.shape(coords)[1] coords = tf.clip_by_value(coords, 0, tf.cast(input_size, 'float32') - 1) #獲得目標座標左上角(left top)的整數座標 coords_lt = tf.cast(tf.floor(coords), 'int32') #獲得又下角的整數座標 coords_rb = tf.cast(tf.ceil(coords), 'int32') #獲得左下角的整數座標 coords_lb = tf.stack([coords_lt[..., 0], coords_rb[..., 1]], axis=-1) #獲得右上角的整數座標 coords_rt = tf.stack([coords_rb[..., 0], coords_lt[..., 1]], axis=-1) #idx爲索引展開,idx大小爲(b*c*h*w),形如(0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3) #b*c爲5,h*w爲4,總數爲全部圖片全部座標總數 idx = tf_repeat(tf.range(batch_size), n_coords) def _get_vals_by_coords(input, coords): #stack完後,每個點表示一個座標 #形如 #(0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3) # (3,2,1,2,3,1,2,3,0,0,0,3,2,1,1,2,3,2,0,0,2) # (3,2,1,0,0,2,0,3,1,2,3,0,0,2,3,0,1,2,0,2,3) indices = tf.stack([ idx, tf_flatten(coords[..., 0]), tf_flatten(coords[..., 1]) ], axis=-1) vals = tf.gather_nd(input, indices) vals = tf.reshape(vals, (batch_size, n_coords)) return vals #如下爲分別獲得左上,左下,右上,右下四個點的像素值。 vals_lt = _get_vals_by_coords(input, coords_lt) vals_rb = _get_vals_by_coords(input, coords_rb) vals_lb = _get_vals_by_coords(input, coords_lb) vals_rt = _get_vals_by_coords(input, coords_rt) #用雙線性插值獲得像素值。 coords_offset_lt = coords - tf.cast(coords_lt, 'float32') vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[..., 0] vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[..., 0] mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[..., 1] return mapped_vals def sp_batch_map_offsets(input, offsets): """Reference implementation for tf_batch_map_offsets""" batch_size = input.shape[0] input_size = input.shape[1] #生成grid,grid表示將一個圖片的全部座標變成兩列,每一行兩個元素表示x,y (grid的最後大小爲(b*c,h*w,2) offsets = offsets.reshape(batch_size, -1, 2) grid = np.stack(np.mgrid[:input_size, :input_size], -1).reshape(-1, 2) grid = np.repeat([grid], batch_size, axis=0) #將原始座標與座標偏移量相加,獲得目標座標,coords的大小爲(b*c,h*w,2) coords = offsets + grid #目標座標須要在圖片最大座標範圍內,將目標座標進行切割限制 coords = coords.clip(0, input_size - 1) #根據原始輸入與目標座標獲得像素。 mapped_vals = sp_batch_map_coordinates(input, coords) return mapped_vals def tf_batch_map_offsets(input, offsets, order=1): """Batch map offsets into input Parameters --------- input : tf.Tensor. shape = (b, s, s) offsets: tf.Tensor. shape = (b, s, s, 2) Returns ------- tf.Tensor. shape = (b, s, s) """ input_shape = tf.shape(input) batch_size = input_shape[0] input_size = input_shape[1] offsets = tf.reshape(offsets, (batch_size, -1, 2)) grid = tf.meshgrid( tf.range(input_size), tf.range(input_size), indexing='ij' ) grid = tf.stack(grid, axis=-1) grid = tf.cast(grid, 'float32') grid = tf.reshape(grid, (-1, 2)) grid = tf_repeat_2d(grid, batch_size) coords = offsets + grid mapped_vals = tf_batch_map_coordinates(input, coords) return mapped_vals