Hi, this is ikki. Finally, I solved the problem of feeding a large number of images to TensorFlow. When I solved it, it was really embarrassing. .. .. Copy and paste that you understand halfway is not good!
Here posted a program to learn images prepared by myself, but it is not possible to learn tens of thousands of image data as it is. It was.
So, first of all, I will post the modified program.
train.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('save_model', 'models/model.ckpt', 'File name of model data')
flags.DEFINE_string('train', 'training/train.txt', 'File name of train data')
flags.DEFINE_string('test', 'training/test.txt', 'File name of test data')
flags.DEFINE_string('train_dir', '/tmp/pict_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 201, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 256, 'Batch size'
'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
def inference(images_placeholder, keep_prob):
###############################################################
#Functions that create models for deep learning
#argument:
# images_placeholder: inputs()Placeholder of the image created in
# keep_prob:dropout rate place_holder
#Return value:
# cross_entropy:Model calculation result
###############################################################
#Weight with standard deviation 0.Initialized with a normal distribution of 1
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
#Bias standard deviation 0.Initialized with a normal distribution of 1
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#Creating a convolution layer
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#Creating a pooling layer
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#Transform input to 28x28x3
x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
#Creation of convolution layer 1
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
#Creation of pooling layer 1
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
#Creation of convolution layer 2
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
#Creation of pooling layer 2
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
#Creation of fully connected layer 1
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#dropout settings
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Creation of fully connected layer 2
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
#Normalization with softmax function
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
def loss(logits, labels):
###############################################################
#Function to calculate loss
#argument:
# logits:Logit tensor, float - [batch_size, NUM_CLASSES]
# labels:Label tensor, int32 - [batch_size, NUM_CLASSES]
#Return value:
# cross_entropy:Model calculation result
###############################################################
cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits,1e-10,1.0)))
tf.scalar_summary("cross_entropy", cross_entropy)
return cross_entropy
def training(loss, learning_rate):
###############################################################
#Functions that define training ops
#argument:
# loss:Loss tensor, loss()Result of
# learning_rate:Learning coefficient
#Return value:
# train_step:Training Op
###############################################################
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
def accuracy(logits, labels):
###############################################################
#Correct answer rate(accuracy)Function to calculate
#argument:
# logits: inference()Result of
# labels:Label tensor, int32 - [batch_size, NUM_CLASSES]
#Return value:
# accuracy:Correct answer rate(float)
###############################################################
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.scalar_summary("accuracy", accuracy)
return accuracy
if __name__ == '__main__':
#Open file
with open(FLAGS.train, 'r') as f: # train.txt
train_image = []
train_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
train_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
train_len = len(train_image)
with open(FLAGS.test, 'r') as f: # test.txt
test_image = []
test_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
test_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
test_label.append(tmp)
test_image = np.asarray(test_image)
test_label = np.asarray(test_label)
test_len = len(test_image)
with tf.Graph().as_default():
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder("float")
logits = inference(images_placeholder, keep_prob)
loss_value = loss(logits, labels_placeholder)
train_op = training(loss_value, FLAGS.learning_rate)
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
#Execution of training
if train_len % FLAGS.batch_size is 0:
train_batch = train_len/FLAGS.batch_size
else:
train_batch = (train_len/FLAGS.batch_size)+1
print "train_batch = "+str(train_batch)
for step in range(FLAGS.max_steps):
for i in range(train_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
# feed_Specify the data to put in the placeholder with dict
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch_plus],
labels_placeholder: train_label[batch:batch_plus],
keep_prob: 0.5})
if step % 10 == 0:
train_accuracy = 0.0
for i in range(train_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
train_accuracy += sess.run(acc, feed_dict={
images_placeholder: train_image[batch:batch_plus],
labels_placeholder: train_label[batch:batch_plus],
keep_prob: 1.0})
if i is not 0: train_accuracy /= 2.0
#Add a value to be displayed on the TensorBoard after each 10 steps
#summary_str = sess.run(summary_op, feed_dict={
# images_placeholder: train_image,
# labels_placeholder: train_label,
# keep_prob: 1.0})
#summary_writer.add_summary(summary_str, step)
print "step %d, training accuracy %g"%(step, train_accuracy)
if test_len % FLAGS.batch_size is 0:
test_batch = test_len/FLAGS.batch_size
else:
test_batch = (test_len/FLAGS.batch_size)+1
print "test_batch = "+str(test_batch)
test_accuracy = 0.0
for i in range(test_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
test_accuracy += sess.run(acc, feed_dict={
images_placeholder: test_image[batch:batch_plus],
labels_placeholder: test_label[batch:batch_plus],
keep_prob: 1.0})
if i is not 0: test_accuracy /= 2.0
print "test accuracy %g"%(test_accuracy)
save_path = saver.save(sess, FLAGS.save_model)
For the time being, I think it works with copy and paste. (Let's copy and paste after understanding!) There are minor corrections, but the main changes that led to the solution are train_accuracy and test_accuracy, which calculate the accuracy. I changed to batch processing. Well, why did you do batch processing where you learned and didn't do that when you calculated accuracy ... ~~ Perhaps some great people will laugh when they see the program, saying, "Why are you doing this? You'll get a memory error w Pupupu ~ www". In fact, I laughed the moment I noticed. ~~ Aside from the negatives, it will take time no matter how many images you eat, but I think it will teach you well.
I will write a little more about how to handle this program.
Place train.txt
and test.txt
in the same folder as this train.py
. The contents of each text are as follows
lang:train.txt&test.txt
/home/picture/images/image1.jpg 0
/home/picture/images/image2.jpg 0
/home/picture/images/image3.jpg 1
/home/picture/images/image4.jpg 2
/home/picture/images/image5.jpg 1
/home/picture/images/image6.jpg 1
:
:
Feeling like that. This time it is classified into 3 classes, so
Image file name Class number name (0 ~ 2)
will do. I always write with an absolute path in such cases. This text file is difficult to write by yourself, so let's write a program to generate it automatically. I will post it if requested, but ...
Also, this time, I created a folder called training
in the same folder as train.py
and put both text files in it.
Also, if you create a folder called models
, a .ckpt
file of the learning results will be created.
This is just an example.
test.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *
NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('readmodels', 'models/model.ckpt', 'File name of model data')
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
if __name__ == '__main__':
test_image = []
for i in range(1, len(sys.argv)):
img = cv2.imread(sys.argv[i])
print img
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
test_image.append(img.flatten().astype(np.float32)/255.0)
test_image = np.asarray(test_image)
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder("float")
logits = inference(images_placeholder, keep_prob)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.initialize_all_variables())
saver.restore(sess,FLAGS.readmodels)
for i in range(len(test_image)):
pr = logits.eval(feed_dict={
images_placeholder: [test_image[i]],
keep_prob: 1.0 })[0]
pred = np.argmax(pr)
print pr
print pred
print "finish"
python test.py image file
If you execute with, you should get the recognition result. Especially the explanation is good.
Actually, train.py
is also compatible with Tensorboard, but I am struggling because the summary part that draws the graph cannot be batch processed.
The problem is that the type of the value returned by sess.run (summary_op, ~~)
is str type, and there are some unknown values for Tensorboard.
** It is not possible to say "add up the results calculated for each batch and draw a graph (by averaging)" **.
Please let me know if you have any solution or if you notice it. .. ..
Recommended Posts