在已經訓練好的模型中,對指定層進行參數的微調,使之適應新的問題。node
首先須要將數據轉換成tfrecord的形式。在data_prepare文件夾下,運行:python
python data_convert.py -t pic/ \ --train-shards 2 \ --validation-shards 2 \ --num-threads 2 \ --dataset-name satellite
python data_convert.py -t pic/ --train-shards 2 --validation-shards 2 --num-threads 2 --dataset-name satellite
tfrecord.pygit
# coding:utf-8 # Copyright 2016 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Converts image data to TFRecords file format with Example protos. The image data set is expected to reside in JPEG files located in the following directory structure. data_dir/label_0/image0.jpeg data_dir/label_0/image1.jpg ... data_dir/label_1/weird-image.jpeg data_dir/label_1/my-image.jpeg ... where the sub-directory is the unique label associated with these images. This TensorFlow script converts the training and evaluation data into a sharded data set consisting of TFRecord files train_directory/train-00000-of-01024 train_directory/train-00001-of-01024 ... train_directory/train-00127-of-01024 and validation_directory/validation-00000-of-00128 validation_directory/validation-00001-of-00128 ... validation_directory/validation-00127-of-00128 where we have selected 1024 and 128 shards for each data set. Each record within the TFRecord file is a serialized Example proto. The Example proto contains the following fields: image/encoded: string containing JPEG encoded image in RGB colorspace image/height: integer, image height in pixels image/width: integer, image width in pixels image/colorspace: string, specifying the colorspace, always 'RGB' image/channels: integer, specifying the number of channels, always 3 image/format: string, specifying the format, always'JPEG' image/filename: string containing the basename of the image file e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' image/class/label: integer specifying the index in a classification layer. start from "class_label_base" image/class/text: string specifying the human-readable version of the label e.g. 'dog' If you data set involves bounding boxes, please look at build_imagenet_data.py. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from datetime import datetime import os import random import sys import threading import numpy as np import tensorflow as tf import logging def _int64_feature(value): """Wrapper for inserting int64 features into Example proto.""" if not isinstance(value, list): value = [value] return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def _bytes_feature(value): """Wrapper for inserting bytes features into Example proto.""" value = tf.compat.as_bytes(value) return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def _convert_to_example(filename, image_buffer, label, text, height, width): """Build an Example proto for an example. Args: filename: string, path to an image file, e.g., '/path/to/example.JPG' image_buffer: string, JPEG encoding of RGB image label: integer, identifier for the ground truth for the network text: string, unique human-readable, e.g. 'dog' height: integer, image height in pixels width: integer, image width in pixels Returns: Example proto """ colorspace = 'RGB' channels = 3 image_format = 'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': _int64_feature(height), 'image/width': _int64_feature(width), 'image/colorspace': _bytes_feature(colorspace), 'image/channels': _int64_feature(channels), 'image/class/label': _int64_feature(label), 'image/class/text': _bytes_feature(text), 'image/format': _bytes_feature(image_format), 'image/filename': _bytes_feature(os.path.basename(filename)), 'image/encoded': _bytes_feature(image_buffer)})) return example class ImageCoder(object): """Helper class that provides TensorFlow image coding utilities.""" def __init__(self): # Create a single Session to run all image coding calls. self._sess = tf.Session() # Initializes function that converts PNG to JPEG data. self._png_data = tf.placeholder(dtype=tf.string) image = tf.image.decode_png(self._png_data, channels=3) self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) # Initializes function that decodes RGB JPEG data. self._decode_jpeg_data = tf.placeholder(dtype=tf.string) self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) def png_to_jpeg(self, image_data): return self._sess.run(self._png_to_jpeg, feed_dict={self._png_data: image_data}) def decode_jpeg(self, image_data): image = self._sess.run(self._decode_jpeg, feed_dict={self._decode_jpeg_data: image_data}) assert len(image.shape) == 3 assert image.shape[2] == 3 return image def _is_png(filename): """Determine if a file contains a PNG format image. Args: filename: string, path of the image file. Returns: boolean indicating if the image is a PNG. """ return '.png' in filename def _process_image(filename, coder): """Process a single image file. Args: filename: string, path to an image file e.g., '/path/to/example.JPG'. coder: instance of ImageCoder to provide TensorFlow image coding utils. Returns: image_buffer: string, JPEG encoding of RGB image. height: integer, image height in pixels. width: integer, image width in pixels. """ # Read the image file. with open(filename, 'rb') as f: image_data = f.read() # Convert any PNG to JPEG's for consistency. if _is_png(filename): logging.info('Converting PNG to JPEG for %s' % filename) image_data = coder.png_to_jpeg(image_data) # Decode the RGB JPEG. image = coder.decode_jpeg(image_data) # Check that image converted to RGB assert len(image.shape) == 3 height = image.shape[0] width = image.shape[1] assert image.shape[2] == 3 return image_data, height, width def _process_image_files_batch(coder, thread_index, ranges, name, filenames, texts, labels, num_shards, command_args): """Processes and saves list of images as TFRecord in 1 thread. Args: coder: instance of ImageCoder to provide TensorFlow image coding utils. thread_index: integer, unique batch to run index is within [0, len(ranges)). ranges: list of pairs of integers specifying ranges of each batches to analyze in parallel. name: string, unique identifier specifying the data set filenames: list of strings; each string is a path to an image file texts: list of strings; each string is human readable, e.g. 'dog' labels: list of integer; each integer identifies the ground truth num_shards: integer number of shards for this data set. """ # Each thread produces N shards where N = int(num_shards / num_threads). # For instance, if num_shards = 128, and the num_threads = 2, then the first # thread would produce shards [0, 64). num_threads = len(ranges) assert not num_shards % num_threads num_shards_per_batch = int(num_shards / num_threads) shard_ranges = np.linspace(ranges[thread_index][0], ranges[thread_index][1], num_shards_per_batch + 1).astype(int) num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] counter = 0 for s in range(num_shards_per_batch): # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' shard = thread_index * num_shards_per_batch + s output_filename = '%s_%s_%.5d-of-%.5d.tfrecord' % (command_args.dataset_name, name, shard, num_shards) output_file = os.path.join(command_args.output_directory, output_filename) writer = tf.python_io.TFRecordWriter(output_file) shard_counter = 0 files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) for i in files_in_shard: filename = filenames[i] label = labels[i] text = texts[i] image_buffer, height, width = _process_image(filename, coder) example = _convert_to_example(filename, image_buffer, label, text, height, width) writer.write(example.SerializeToString()) shard_counter += 1 counter += 1 if not counter % 1000: logging.info('%s [thread %d]: Processed %d of %d images in thread batch.' % (datetime.now(), thread_index, counter, num_files_in_thread)) sys.stdout.flush() writer.close() logging.info('%s [thread %d]: Wrote %d images to %s' % (datetime.now(), thread_index, shard_counter, output_file)) sys.stdout.flush() shard_counter = 0 logging.info('%s [thread %d]: Wrote %d images to %d shards.' % (datetime.now(), thread_index, counter, num_files_in_thread)) sys.stdout.flush() def _process_image_files(name, filenames, texts, labels, num_shards, command_args): """Process and save list of images as TFRecord of Example protos. Args: name: string, unique identifier specifying the data set filenames: list of strings; each string is a path to an image file texts: list of strings; each string is human readable, e.g. 'dog' labels: list of integer; each integer identifies the ground truth num_shards: integer number of shards for this data set. """ assert len(filenames) == len(texts) assert len(filenames) == len(labels) # Break all images into batches with a [ranges[i][0], ranges[i][1]]. spacing = np.linspace(0, len(filenames), command_args.num_threads + 1).astype(np.int) ranges = [] for i in range(len(spacing) - 1): ranges.append([spacing[i], spacing[i + 1]]) # Launch a thread for each batch. logging.info('Launching %d threads for spacings: %s' % (command_args.num_threads, ranges)) sys.stdout.flush() # Create a mechanism for monitoring when all threads are finished. coord = tf.train.Coordinator() # Create a generic TensorFlow-based utility for converting all image codings. coder = ImageCoder() threads = [] for thread_index in range(len(ranges)): args = (coder, thread_index, ranges, name, filenames, texts, labels, num_shards, command_args) t = threading.Thread(target=_process_image_files_batch, args=args) t.start() threads.append(t) # Wait for all the threads to terminate. coord.join(threads) logging.info('%s: Finished writing all %d images in data set.' % (datetime.now(), len(filenames))) sys.stdout.flush() def _find_image_files(data_dir, labels_file, command_args): """Build a list of all images files and labels in the data set. Args: data_dir: string, path to the root directory of images. Assumes that the image data set resides in JPEG files located in the following directory structure. data_dir/dog/another-image.JPEG data_dir/dog/my-image.jpg where 'dog' is the label associated with these images. labels_file: string, path to the labels file. The list of valid labels are held in this file. Assumes that the file contains entries as such: dog cat flower where each line corresponds to a label. We map each label contained in the file to an integer starting with the integer 0 corresponding to the label contained in the first line. Returns: filenames: list of strings; each string is a path to an image file. texts: list of strings; each string is the class, e.g. 'dog' labels: list of integer; each integer identifies the ground truth. """ logging.info('Determining list of input files and labels from %s.' % data_dir) unique_labels = [l.strip() for l in tf.gfile.FastGFile( labels_file, 'r').readlines()] labels = [] filenames = [] texts = [] # Leave label index 0 empty as a background class. """很是重要,這裏咱們調整label從0開始以符合定義""" label_index = command_args.class_label_base # Construct the list of JPEG files and labels. for text in unique_labels: jpeg_file_path = '%s/%s/*' % (data_dir, text) matching_files = tf.gfile.Glob(jpeg_file_path) labels.extend([label_index] * len(matching_files)) texts.extend([text] * len(matching_files)) filenames.extend(matching_files) if not label_index % 100: logging.info('Finished finding files in %d of %d classes.' % ( label_index, len(labels))) label_index += 1 # Shuffle the ordering of all image files in order to guarantee # random ordering of the images with respect to label in the # saved TFRecord files. Make the randomization repeatable. shuffled_index = list(range(len(filenames))) random.seed(12345) random.shuffle(shuffled_index) filenames = [filenames[i] for i in shuffled_index] texts = [texts[i] for i in shuffled_index] labels = [labels[i] for i in shuffled_index] logging.info('Found %d JPEG files across %d labels inside %s.' % (len(filenames), len(unique_labels), data_dir)) # print(labels) return filenames, texts, labels def _process_dataset(name, directory, num_shards, labels_file, command_args): """Process a complete data set and save it as a TFRecord. Args: name: string, unique identifier specifying the data set. directory: string, root path to the data set. num_shards: integer number of shards for this data set. labels_file: string, path to the labels file. """ filenames, texts, labels = _find_image_files(directory, labels_file, command_args) _process_image_files(name, filenames, texts, labels, num_shards, command_args) def check_and_set_default_args(command_args): if not (hasattr(command_args, 'train_shards')) or command_args.train_shards is None: command_args.train_shards = 5 if not (hasattr(command_args, 'validation_shards')) or command_args.validation_shards is None: command_args.validation_shards = 5 if not (hasattr(command_args, 'num_threads')) or command_args.num_threads is None: command_args.num_threads = 5 if not (hasattr(command_args, 'class_label_base')) or command_args.class_label_base is None: command_args.class_label_base = 0 if not (hasattr(command_args, 'dataset_name')) or command_args.dataset_name is None: command_args.dataset_name = '' assert not command_args.train_shards % command_args.num_threads, ( 'Please make the command_args.num_threads commensurate with command_args.train_shards') assert not command_args.validation_shards % command_args.num_threads, ( 'Please make the command_args.num_threads commensurate with ' 'command_args.validation_shards') assert command_args.train_directory is not None assert command_args.validation_directory is not None assert command_args.labels_file is not None assert command_args.output_directory is not None def main(command_args): """ command_args:須要有如下屬性: command_args.train_directory 訓練集所在的文件夾。這個文件夾下面,每一個文件夾的名字表明label名稱,再下面就是圖片。 command_args.validation_directory 驗證集所在的文件夾。這個文件夾下面,每一個文件夾的名字表明label名稱,再下面就是圖片。 command_args.labels_file 一個文件。每一行表明一個label名稱。 command_args.output_directory 一個文件夾,表示最後輸出的位置。 command_args.train_shards 將訓練集分紅多少份。 command_args.validation_shards 將驗證集分紅多少份。 command_args.num_threads 線程數。必須是上面兩個參數的約數。 command_args.class_label_base 很重要!真正的tfrecord中,每一個class的label號從多少開始,默認爲0(在models/slim中就是從0開始的) command_args.dataset_name 字符串,輸出的時候的前綴。 圖片不能夠有損壞。不然會致使線程提早退出。 """ check_and_set_default_args(command_args) logging.info('Saving results to %s' % command_args.output_directory) # Run it! _process_dataset('validation', command_args.validation_directory, command_args.validation_shards, command_args.labels_file, command_args) _process_dataset('train', command_args.train_directory, command_args.train_shards, command_args.labels_file, command_args)
data_convert.pygithub
# coding:utf-8 from __future__ import absolute_import import argparse import os import logging from src.tfrecord import main def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-t', '--tensorflow-data-dir', default='pic/') parser.add_argument('--train-shards', default=2, type=int) parser.add_argument('--validation-shards', default=2, type=int) parser.add_argument('--num-threads', default=2, type=int) parser.add_argument('--dataset-name', default='satellite', type=str) return parser.parse_args() if __name__ == '__main__': logging.basicConfig(level=logging.INFO) args = parse_args() args.tensorflow_dir = args.tensorflow_data_dir args.train_directory = os.path.join(args.tensorflow_dir, 'train') args.validation_directory = os.path.join(args.tensorflow_dir, 'validation') args.output_directory = args.tensorflow_dir args.labels_file = os.path.join(args.tensorflow_dir, 'label.txt') if os.path.exists(args.labels_file) is False: logging.warning('Can\'t find label.txt. Now create it.') all_entries = os.listdir(args.train_directory) dirnames = [] for entry in all_entries: if os.path.isdir(os.path.join(args.train_directory, entry)): dirnames.append(entry) with open(args.labels_file, 'w') as f: for dirname in dirnames: f.write(dirname + '\n') main(args)
這樣在pic文件夾下就會生成4個tfrecord文件和1個label.txt文件。spring
參考3.3.2小節對Slim源碼作修改。express
在slim文件夾下新建一個satellite目錄。在這個目錄下作下面幾件事情:apache
3.3.4 開始訓練網絡
(在slim文件夾下運行)訓練Logits層:
使用GPU問題
異常日誌:app
Cannot assign a device for operation 'InceptionV3/Conv2d_1a_3x3/weights/Initializer/truncated_normal/TruncatedNormal': Could not satisfy explicit device specification '' because the node was colocated with a group of nodes t hat required incompatible device '/device:GPU:0' 經分析,與GPU使用有關 解決方法: 1.不使用GPU 參考https://github.com/tensorflow/models/issues/1823 In the train_image_classifier.py script line 40 there is: tf.app.flags.DEFINE_boolean('clone_on_cpu', False, 'Use CPUs to deploy clones.') Turn this function to True if you will to use CPUs instead of GPUs which seems to be enabled by default.
python train_image_classifier.py \ --train_dir=satellite/train_dir \ --dataset_name=satellite \ --dataset_split_name=train \ --dataset_dir=satellite/data \ --model_name=inception_v3 \ --checkpoint_path=satellite/pretrained/inception_v3.ckpt \ --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ --max_number_of_steps=100000 \ --batch_size=32 \ --learning_rate=0.001 \ --learning_rate_decay_type=fixed \ --save_interval_secs=300 \ --save_summaries_secs=2 \ --log_every_n_steps=10 \ --optimizer=rmsprop \ --weight_decay=0.00004
python train_image_classifier.py --train_dir=satellite/train_dir --dataset_name=satellite --dataset_split_name=train --dataset_dir=satellite/data --model_name=inception_v3 --checkpoint_path=satellite/pretrained/inception_v3.ckpt --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits --max_number_of_steps=100000 --batch_size=32 --learning_rate=0.001 --learning_rate_decay_type=fixed --save_interval_secs=300 --save_summaries_secs=2 --log_every_n_steps=10 --optimizer=rmsprop --weight_decay=0.00004
訓練全部層:less
python train_image_classifier.py \ --train_dir=satellite/train_dir \ --dataset_name=satellite \ --dataset_split_name=train \ --dataset_dir=satellite/data \ --model_name=inception_v3 \ --checkpoint_path=satellite/pretrained/inception_v3.ckpt \ --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ --max_number_of_steps=100000 \ --batch_size=32 \ --learning_rate=0.001 \ --learning_rate_decay_type=fixed \ --save_interval_secs=300 \ --save_summaries_secs=10 \ --log_every_n_steps=1 \ --optimizer=rmsprop \ --weight_decay=0.00004
python train_image_classifier.py --train_dir=satellite/train_dir --dataset_name=satellite --dataset_split_name=train --dataset_dir=satellite/data --model_name=inception_v3 --checkpoint_path=satellite/pretrained/inception_v3.ckpt --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits --max_number_of_steps=100000 --batch_size=32 --learning_rate=0.001 --learning_rate_decay_type=fixed --save_interval_secs=300 --save_summaries_secs=10 --log_every_n_steps=1 --optimizer=rmsprop --weight_decay=0.00004
3.3.6 驗證模型準確率
在slim文件夾下運行:
python eval_image_classifier.py \ --checkpoint_path=satellite/train_dir \ --eval_dir=satellite/eval_dir \ --dataset_name=satellite \ --dataset_split_name=validation \ --dataset_dir=satellite/data \ --model_name=inception_v3
python eval_image_classifier.py --checkpoint_path=satellite/train_dir --eval_dir=satellite/eval_dir --dataset_name=satellite --dataset_split_name=validation --dataset_dir=satellite/data --model_name=inception_v3
3.3.7 TensorBoard 可視化與超參數選擇
打開TensorBoard:
tensorboard --logdir satellite/train_dir
3.3.8 導出模型並對單張圖片進行識別
在slim文件夾下運行:
python export_inference_graph.py \ --alsologtostderr \ --model_name=inception_v3 \ --output_file=satellite/inception_v3_inf_graph.pb \ --dataset_name satellite
python export_inference_graph.py --alsologtostderr --model_name=inception_v3 --output_file=satellite/inception_v3_inf_graph.pb --dataset_name satellite
在chapter_3文件夾下運行(需將5271改爲train_dir中保存的實際的模型訓練步數):
python freeze_graph.py \ --input_graph slim/satellite/inception_v3_inf_graph.pb \ --input_checkpoint slim/satellite/train_dir/model.ckpt-5271 \ --input_binary true \ --output_node_names InceptionV3/Predictions/Reshape_1 \ --output_graph slim/satellite/frozen_graph.pb
python freeze_graph.py --input_graph slim/satellite/inception_v3_inf_graph.pb --input_checkpoint slim/satellite/train_dir/model.ckpt-100 --input_binary true --output_node_names InceptionV3/Predictions/Reshape_1 --output_graph slim/satellite/frozen_graph.pb
運行導出模型分類單張圖片:
python classify_image_inception_v3.py \ --model_path slim/satellite/frozen_graph.pb \ --label_path data_prepare/pic/label.txt \ --image_file test_image.jpg
python classify_image_inception_v3.py --model_path slim/satellite/frozen_graph.pb --label_path data_prepare/pic/label.txt --image_file test_image.jpg