1 #coding = utf-8
2
3 import collections
4 import tensorflow as tf
5 from datetime import datetime
6 import math
7 import time
8
9 slim = tf.contrib.slim
10
11
12 class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
13 '''A named tuple describing a ResNet block.'''
14
15 def subsample(inputs, factor, scope=None):
16 '''降採樣方法:
17 factor:採樣因子 1:不作修改直接返回 不爲1:使用slim.max_pool2d降採樣'''
18 if factor ==1:
19 return inputs
20 else:
21 return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
22
23
24 def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
25 '''建立卷積層'''
26 if stride == 1:
27 '''stride爲1,使用slim.conv2d,padding爲SAME'''
28 return slim.conv2d(inputs, num_outputs, kernel_size, stride=1,
29 padding='SAME', scope=scope)
30
31 else:
32 '''顯示地pad zero:
33 pad zero總數爲kernel size-1,pad_beg:pad//2, pad_end:餘下部分'''
34 pad_total = kernel_size-1
35 pad_beg = pad_total//2
36 pad_end = pad_total - pad_beg
37 '''tf.pad對inputs進行補零操做'''
38 inputs = tf.pad(inputs, [[0,0], [pad_beg, pad_end],
39 [pad_beg, pad_end], [0, 0]])
40
41 return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
42 padding='VALID', scope=scope)
43
44 @slim.add_arg_scope
45 def stack_blocks_dense(net, blocks, outputs_collections=None):
46 '''net:input
47 blocks:Block的class的列表
48 outputs_collections:收集各個end_points的collections'''
49 for block in blocks:
50 '''雙層for循環,逐個Block,逐個Residual Unit堆疊'''
51 with tf.variable_scope(block.scope, 'block', [net]) as sc:
52 '''兩個tf.variable將殘差學習單元命名爲block_1/unit_1形式'''
53
54 for i, unit in enumerate(block.args):
55 with tf.variable_scope('unit_%d' %(i+1), values=[net]):
56
57 '''利用第二層for循環拿到前面定義Blocks Residual Unit中args,
58 將其展開爲depth、depth_bottleneck、stride'''
59 unit_depth, unit_depth_bottleneck, unit_stride = unit
60
61 '''使用unit_fn函數(殘差學習單元的生成函數)
62 順序地建立並鏈接全部的殘差學習單元'''
63 net = block.unit_fn(net,
64 depth=unit_depth,
65 depth_bottleneck=unit_depth_bottleneck,
66 stride=unit_stride)
67
68 '''slim.utils.collect_named_outputs將輸出net添加到collection中'''
69 net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
70
71 '''全部的Residual Unit都堆疊完後,最後返回net做爲stack_blocks_dense的結果'''
72 return net
73
74
75 def resnet_arg_scope(is_training=True,
76 weight_decay=0.0001,
77 batch_norm_decay=0.097,
78 batch_norm_epsilon=1e-5,
79 batch_norm_scale=True):
80 '''建立ResNet通用的arg_scope(做用:定義某些函數的參數默認值)'''
81
82 batch_norm_params = {
83 'is_training': is_training,
84 'decay': batch_norm_decay,#默認爲0.0001,BN的衰減速率默認爲:0.997
85 'epsilon': batch_norm_epsilon,#默認爲1e-5
86 'scale': batch_norm_scale,#BN的scale默認爲True
87 'updates_collections': tf.GraphKeys.UPDATE_OPS,
88 }
89
90 with slim.arg_scope(
91 [slim.conv2d],
92 weights_regularizer=slim.l2_regularizer(weight_decay),
93 weights_initializer=slim.variance_scaling_initializer(),
94 activation_fn=tf.nn.relu,
95 normalizer_fn=slim.batch_norm,
96 normalizer_params=batch_norm_params):
97
98 with slim.arg_scope([slim.batch_norm], **batch_norm_params):
99 with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
100
101 return arg_sc
102
103 @slim.add_arg_scope
104 def bottleneck(inputs, depth, depth_bottleneck, stride,
105 outputs_collections=None, scope=None):
106 '''bottleneck殘差學習單元
107 inputs:輸入
108 depth、depth_bottleneck、stride是Blocks類中的args
109 outputs_collections:收集end_points的collection
110 scope:unit的名稱'''
111 with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
112
113 '''slim.utils.last_dimension獲取輸入的最後一個維度,輸出通道數,min_rank=4限定最少爲4個維度'''
114 depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
115
116 '''slim.batch_norm對輸入進行Batch Normalization,接着用relu進行預激活的Preactivate'''
117 preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu,
118 scope='preact')
119 '''定義shortcut(直連的x)'''
120 if depth == depth_in:
121 '''若是殘差單元輸入通道數和輸出通道數同樣
122 使用subsample按步長對inputs進行空間上的降採樣'''
123 shortcut = subsample(inputs, stride, 'shortcut')
124
125 else:
126 '''若是殘差單元輸入通道數和輸出通道數不同,
127 使用stride步長的1x1卷積改變其通道數,使得輸入通道數和輸出通道數一致'''
128 shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
129 normalizer_fn=None, activation_fn=None,
130 scope='shortcut')
131 '''定義殘差:
132 第一步:1x1尺寸、步長爲一、輸出通道數爲depth_bottleneck的卷積
133 第二步:3x3尺寸、步長爲stride、輸出通道數爲depth_bottleneck的卷積
134 第三步:1x1尺寸、步長爲一、輸出通道數爲depth的卷積'''
135 residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
136 scope='conv1')
137
138 residual = slim.conv2d(residual, depth_bottleneck, 3, stride,
139 scope='conv2')
140 residual = slim.conv2d(residual, depth, [1, 1], stride=1,
141 normalizer_fn=None, activation_fn=None,
142 scope='conv3')
143
144 output = shortcut + residual
145
146 '''slim.utils.collect_named_ouputs將結果添加到outputs_collections並返回output做爲函數結果'''
147 return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
148
149
150 def resnet_v2(inputs,
151 blocks,
152 num_classes=None,
153 global_pool=True,
154 include_root_block=True,
155 reuse=None,
156 scope=None):
157 '''定義生成ResNet V2的主函數
158 inputs:輸入
159 blocks:定義好的Blocks類的的列表
160 num_classes:最後輸出的類數
161 global_pool:是否加上最後的一層全局平均池化的標誌
162 include_root_blocks:是否加上ResNet網絡最前面一般使用的7x7卷積核最大池化的標誌
163 reuse:是否重用的標誌
164 scope:整個網絡名稱'''
165
166 with tf.variable_scope(scope, 'resent_v2', [inputs], reuse=reuse) as sc:
167 end_points_collection = sc.original_name_scope + '_end_points'
168
169 '''slim.arg_scope將slim.conv2d, bottleneck,stack_blocks_dense 3個函數的參數
170 outputs_collections默認設置爲end_points_collection'''
171 with slim.arg_scope([slim.conv2d, bottleneck,
172 stack_blocks_dense],
173 outputs_collections=end_points_collection):
174
175 net = inputs
176
177 if include_root_block:
178
179 with slim.arg_scope([slim.conv2d], activation_fn=None,
180 normalizer_fn=None):
181 '''根據include_root_block標記,建立ResNet
182 最前面的64輸出通道的步長爲2的7x7卷積'''
183 net = conv2d_same(net, 64, 7, stride=2, scope='conv1')
184
185 '''步長爲2的3x3最大池化,通過2次步長爲2的層後,圖片尺寸已經縮小爲1/4'''
186 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
187 '''利用stack_blocks_dens將殘差學習模塊完成'''
188 net = stack_blocks_dense(net, blocks)
189 net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
190
191 if global_pool:
192 '''根據標記添加平均池化層,這裏用tf.reduce_mean比avg_pool高'''
193 net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
194
195 if num_classes is not None:
196 '''根據是否有分類數,添加一個輸出通道爲num_classes的1x1卷積'''
197 net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
198 normalizer_fn=None, scope='logits')
199
200 '''slim.utils.convert_collection_to_dict將collection轉化爲dict'''
201 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
202
203 if num_classes is not None:
204 '''添加一個softmax層輸出網絡結果'''
205 end_points['prediction'] = slim.softmax(net, scope='predictions')
206
207 return net, end_points
208
209
210 def resnet_v2_50(inputs,
211 num_classes=None,
212 global_pool=True,
213 reuse=None,
214 scope='resnet_v2_50'):
215 '''設計50層的ResNet
216 四個blocks的units數量爲三、四、六、3,總層數爲(3+4+6+3)*3+2=50
217 前3個blocks包含步長爲2的層,總尺寸224/(4*2*2*2)=7 輸出通道變爲2048'''
218 blocks = [
219 Block('block1', bottleneck, [(256, 64, 1)]*2 + [(256, 64, 2)]),
220 Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
221 Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
222 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
223 ]
224
225 return resnet_v2(inputs, blocks, num_classes, global_pool,
226 include_root_block=True, reuse=reuse, scope=scope)
227
228 def resnet_v2_101(inputs,
229 num_classes=None,
230 global_pool=True,
231 reuse=None,
232 scope='resnet_v2_101'):
233 '''設計101層的ResNet
234 四個blocks的units數量爲三、四、2三、3,總層數爲(3+4+23+3)*3+2=101
235 前3個blocks包含步長爲2的層,總尺寸224/(4*2*2*2)=7 輸出通道變爲2048'''
236 blocks = [
237 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
238 Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
239 Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
240 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
241 ]
242
243 return resnet_v2(inputs, blocks, num_classes, global_pool,
244 include_root_block=True, reuse=reuse, scope=scope)
245
246 def resnet_v2_152(inputs,
247 num_classes=None,
248 global_pool=True,
249 reuse=None,
250 scope='resnet_v2_152'):
251 '''設計152層的ResNet
252 四個blocks的units數量爲三、八、3六、3,總層數爲(3+8+36+3)*3+2=152
253 前3個blocks包含步長爲2的層,總尺寸224/(4*2*2*2)=7 輸出通道變爲2048'''
254 blocks = [
255 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
256 Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
257 Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
258 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
259 ]
260
261 return resnet_v2(inputs, blocks, num_classes, global_pool,
262 include_root_block=True, reuse=reuse, scope=scope)
263
264 def resnet_v2_200(inputs,
265 num_classes=None,
266 global_pool=True,
267 reuse=None,
268 scope='resnet_v2_200'):
269 '''設計200層的ResNet
270 四個blocks的units數量爲三、八、3六、3,總層數爲(3+24+36+3)*3+2=200
271 前3個blocks包含步長爲2的層,總尺寸224/(4*2*2*2)=7 輸出通道變爲2048'''
272 blocks = [
273 Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
274 Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
275 Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
276 Block('block4', bottleneck, [(2048, 512, 1)] * 3)
277 ]
278
279 return resnet_v2(inputs, blocks, num_classes, global_pool,
280 include_root_block=True, reuse=reuse, scope=scope)
281
282 def time_tensorflow_run(session, target, info_string):
283
284 num_steps_burn_in = 10
285 total_duration = 0.0
286 total_duration_squared = 0.0
287 for i in range(num_batches+num_steps_burn_in):
288 start_time = time.time()
289 _ = session.run(target)
290 duration = time.time()-start_time
291
292 if i >= num_steps_burn_in:
293 if not i % 10:
294 print('%s: step %d, duration = %.3f' %(datetime.now(), i-num_steps_burn_in, duration))
295 total_duration += duration
296 total_duration_squared += duration*duration
297
298 mn = total_duration/num_batches
299 vr = total_duration_squared/num_batches-mn*mn
300 sd = math.sqrt(vr)
301
302 print('%s: %s across %d steps, %.3f +/- %3.3f sec/batch' %(datetime.now(), info_string, num_batches, mn, sd))
303
304 batch_size = 32
305 height, width = 224, 224
306 inputs = tf.random_uniform((batch_size, height, width, 3))
307 with slim.arg_scope(resnet_arg_scope(is_training=False)):
308 net, end_points = resnet_v2_152(inputs, 1000)
309
310 init = tf.global_variables_initializer()
311 sess = tf.Session()
312 sess.run(init)
313 num_batches = 100
314 time_tensorflow_run(sess, net, 'Forward')