Hello everyone. @best_not_best. This article is a continuation of First Deep Learning ~ Struggle ~. If you haven't read it, please read it first. I'm sorry it's been posted for almost a year ...
This article is a product of personal desires and is not the official view of the organization to which it belongs.
Machine / OS
MacBook Pro (Retina, 15-inch, Mid 2014)
OS X Yosemite 10.10.5
Python
Python package
lxml 3.6.0
selenium 3.0.2
numpy 1.11.2
opencv3 3.1.0 (Install with conda.)
tensorflow 0.10.0
I am running in Anaconda to use OpenCV with Python 3.x.
Please refer to Preparation for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import lxml.html
from selenium import webdriver
import os
target_url = 'http://hogehoge.co.jp/list.html'
driver = webdriver.PhantomJS(service_log_path = os.path.devnull)
driver.get(target_url)
root = lxml.html.fromstring(driver.page_source)
links = root.cssselect('td.text12m')
for link in links:
if link.text is None:
continue
if link.text.isdigit():
print(link.text)
driver.close()
driver.quit()
The employee ID is output to the standard output, so please redirect to a file etc. From now on, this file will be treated as member_id.txt.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import urllib.request
import urllib.parse
import time
#The above employee ID file
ID_LIST = '/path/to/member_id.txt'
#Employee image URL format
URL_FMT = 'http://hogehoge.co.jp/%s.jpg'
#File save destination path format
OUTPUT_FMT = '/path/to/photo/%s.jpg'
opener = urllib.request.build_opener()
urllib.request.install_opener(opener)
for id in open(ID_LIST, 'r'):
url = URL_FMT % (id.strip())
try:
img = urllib.request.urlopen(url, timeout=5).read()
if len(img) == 0:
continue
except urllib.request.URLError:
print(url, 'URLError')
except IOError:
print(url, 'IOError')
except UnicodeEncodeError:
print(url, 'EncodeError')
except OSError:
print(url, 'OSError')
else:
output = OUTPUT_FMT % id.strip()
file = open(output, 'wb')
file.write(img)
file.close()
time.sleep(0.1)
It will be saved with the following file name.
000001.jpg
000002.jpg
000003.jpg
000004.jpg
000005.jpg
...
Please refer to Preparation for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy
import os
import sys
import cv2
#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 1.Directory saved in
INPUT_DIR_PATH = '/path/to/photos/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)
files = os.listdir(INPUT_DIR_PATH)
for file in files:
input_image_path = INPUT_DIR_PATH + file
#File reading
image = cv2.imread(input_image_path)
#Grayscale conversion
try:
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
except cv2.error:
continue
#Acquire the features of the cascade classifier
cascade = cv2.CascadeClassifier(CASCADE_PATH)
#Execution of object recognition (face recognition)
facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))
if len(facerect) > 0:
#Saving recognition results
i = 1
for rect in facerect:
x = rect[0]
y = rect[1]
w = rect[2]
h = rect[3]
path, ext = os.path.splitext(os.path.basename(file))
output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, path, i, ext)
try:
im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
cv2.imwrite(output_image_path, im)
except cv2.error:
print(file)
continue
i += 1
It will be saved with the following file name.
000001_1.jpg
000002_1.jpg
000003_1.jpg
000003_2.jpg
000004_1.jpg
...
Please refer to Struggle for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import os
import json
import urllib.request
import urllib.parse
import requests
import mimetypes
import re
# API URL
BING_URL = 'https://api.datamarket.azure.com/Bing/Search/Image?'
# API ACCESS KEY
MS_ACCTKEY = 'hogehoge'
QUERY = 'The name of your favorite celebrity'
#Directory for saving acquired images
OUTPUT_DIR_PATH = '/path/to/talent/'
opener = urllib.request.build_opener()
urllib.request.install_opener(opener)
def download_urllist(urllist, skip):
for url in urllist:
try:
img = urllib.request.urlopen(url, timeout=5).read()
if len(img) == 0:
continue
url = re.sub(r'\?.*', '', url)
mine_type = mimetypes.guess_type(url)[0]
if mine_type is None:
mine_type = 'jpeg'
else:
mine_type = mine_type.split('/')[1]
file_name = '%s.%s' % (skip, mine_type)
with open(OUTPUT_DIR_PATH + file_name, 'wb') as f:
f.write(img)
except urllib.request.URLError:
print('URLError')
except IOError:
print('IOError')
except UnicodeEncodeError:
print('EncodeError')
except OSError:
print('OSError')
skip += 1
if __name__ == "__main__":
query = urllib.request.quote(QUERY)
step = 20
num = 50
url_param_dict = {
'Query': "'"+QUERY+"'",
'Market': "'ja-JP'",
}
url_param_base = urllib.parse.urlencode(url_param_dict)
url_param_base = url_param_base + '&$format=json&$top=%d&$skip='%(num)
for skip in range(0, num*step, num):
url_param = url_param_base + str(skip)
url = BING_URL + url_param
response = requests.get(url,
auth=(MS_ACCTKEY, MS_ACCTKEY),
headers={'User-Agent': 'My API Robot'})
response = response.json()
urllist = [item['MediaUrl'] for item in response['d']['results']]
download_urllist(urllist, skip)
Please refer to Struggle for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy
import os
import sys
import cv2
#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 3.Directory saved in
INPUT_DIR_PATH = '/path/to/talent/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/talent_cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)
files = os.listdir(INPUT_DIR_PATH)
for file in files:
input_image_path = INPUT_DIR_PATH + file
#File reading
image = cv2.imread(input_image_path)
#Grayscale conversion
try:
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
except cv2.error:
continue
#Acquire the features of the cascade classifier
cascade = cv2.CascadeClassifier(CASCADE_PATH)
#Execution of object recognition (face recognition)
facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))
if len(facerect) > 0:
#Saving recognition results
i = 1
for rect in facerect:
x = rect[0]
y = rect[1]
w = rect[2]
h = rect[3]
path, ext = os.path.splitext(os.path.basename(file))
output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, count, i, ext)
try:
im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
cv2.imwrite(output_image_path, im)
except cv2.error:
print(file)
continue
i += 1
It will be saved with the following file name.
7_3.jpeg
6_1.jpeg
4_1.jpeg
3_1.jpeg
2_1.jpeg
...
Change the QUERY
and ʻOUTPUT_DIR_PATHof the program in 3. to run it. This time, I tried to execute it with
QUERY` called" general public ".
QUERY = 'Ordinary people'
OUTPUT_DIR_PATH = '/path/to/other_talent/'
I will omit it because it is the same process as 4.
Create a dataset. Label the image file of your favorite celebrity with "1" and sort it randomly.
$ ls -la /path/to/talent_cutout/*.* | awk '{print $9" 1"}' | gsort -R > talent.txt
Divide 80% into learning data and 20% into test data. (The following is divided into 752 and 189 because the total number of files was 941.)
$ head -752 talent.txt > talent_train.txt
$ tail -189 talent.txt > talent_test.txt
Similarly, images other than favorite entertainers are also labeled as "2" and divided into learning data (commons_train.txt) and test data (commons_test.txt). Each training data and test data are combined and randomly sorted.
$ cat commons_train.txt talent_train.txt | gsort -R > train.txt
$ cat commons_test.txt talent_test.txt | gsort -R > test.txt
The contents of the file are as follows.
$ head -5 train.txt
/path/to/other_talent_cutout/152_16.jpeg 2
/path/to/talent_cutout/371_1.jpg 1
/path/to/talent_cutout/349_1.jpg 1
/path/to/talent_cutout/523_2.jpg 1
/path/to/other_talent_cutout/348_2.jpeg 2
Let Tensorflow learn. TensorFlow To learn from a large number of images ... ~ (almost) solution ~ --Qiita was used as a reference.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * 3
flags = tf.app.flags
FLAGS = flags.FLAGS
#The path of the file to save the learning result
flags.DEFINE_string('save_model', '/path/to/model.ckpt', 'File name of model data')
#Training data path
flags.DEFINE_string('train', '/path/to/train.txt', 'File name of train data.')
#Test data path
flags.DEFINE_string('test', '/path/to/test.txt', 'File name of test data.')
flags.DEFINE_string('train_dir', './log_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 100, 'Number of steps to run trainer.')
flags.DEFINE_integer(
'batch_size',
10,
'Batch size'
'Must divide evenly into the dataset sizes.'
)
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(
x,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME'
)
x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
def loss(logits, labels):
cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits, 1e-10, 1.0)))
tf.scalar_summary('cross_entropy', cross_entropy)
return cross_entropy
def training(loss, learning_rate):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
def accuracy(logits, labels):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
tf.scalar_summary('accuracy', accuracy)
return accuracy
if __name__ == '__main__':
with open(FLAGS.train, 'r') as f: # train.txt
train_image = []
train_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
train_image.append(img.flatten().astype(np.float32) / 255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
train_len = len(train_image)
with open(FLAGS.test, 'r') as f:
test_image = []
test_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
test_image.append(img.flatten().astype(np.float32) / 255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
test_label.append(tmp)
test_image = np.asarray(test_image)
test_label = np.asarray(test_label)
test_len = len(test_image)
with tf.Graph().as_default():
images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder('float')
logits = inference(images_placeholder, keep_prob)
loss_value = loss(logits, labels_placeholder)
train_op = training(loss_value, FLAGS.learning_rate)
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
if train_len % FLAGS.batch_size is 0:
train_batch = train_len / FLAGS.batch_size
else:
train_batch = (train_len / FLAGS.batch_size) + 1
print('train_batch = ' + str(train_batch))
for step in range(FLAGS.max_steps):
for i in range(int(train_batch)):
batch = FLAGS.batch_size * i
batch_plus = FLAGS.batch_size * (i + 1)
if batch_plus > train_len:
batch_plus = train_len
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch: batch_plus],
labels_placeholder: train_label[batch: batch_plus],
keep_prob: 0.5
})
if step % 10 == 0:
train_accuracy = 0.0
for i in range(int(train_batch)):
batch = FLAGS.batch_size * i
batch_plus = FLAGS.batch_size * (i + 1)
if batch_plus > train_len: batch_plus = train_len
train_accuracy += sess.run(acc, feed_dict={
images_placeholder: train_image[batch: batch_plus],
labels_placeholder: train_label[batch: batch_plus],
keep_prob: 1.0})
if i is not 0: train_accuracy /= 2.0
print('step %d, training accuracy %g' % (step, train_accuracy))
if test_len % FLAGS.batch_size is 0:
test_batch = test_len / FLAGS.batch_size
else:
test_batch = (test_len / FLAGS.batch_size) + 1
print('test_batch = ' + str(test_batch))
test_accuracy = 0.0
for i in range(int(test_batch)):
batch = FLAGS.batch_size * i
batch_plus = FLAGS.batch_size * (i + 1)
if batch_plus > train_len:
batch_plus = train_len
test_accuracy += sess.run(
acc,
feed_dict={
images_placeholder: test_image[batch:batch_plus],
labels_placeholder: test_label[batch:batch_plus],
keep_prob: 1.0
}
)
if i is not 0:
test_accuracy /= 2.0
print('test accuracy %g' % (test_accuracy))
save_path = saver.save(sess, FLAGS.save_model)
The learning result is saved in /path/to/model.ckpt
.
Again, I referred to TensorFlow to learn from a large number of images ... ~ (almost) solution ~ --Qiita.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *
NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * 3
# 2.Directory for saving images cropped with
DIR_PATH = '/path/to/cutout/'
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('readmodels', '/path/to/model.ckpt', 'File name of model data')
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(
x,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME'
)
x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
if __name__ == '__main__':
test_image = []
test_image_name = []
files = os.listdir(DIR_PATH)
for file in files:
if file == '.DS_Store':
continue
img = cv2.imread(DIR_PATH + file)
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
test_image.append(img.flatten().astype(np.float32) / 255.0)
test_image_name.append(file)
test_image = np.asarray(test_image)
images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder('float')
logits = inference(images_placeholder, keep_prob)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.initialize_all_variables())
saver.restore(sess,FLAGS.readmodels)
for i in range(len(test_image)):
pr = logits.eval(feed_dict={
images_placeholder: [test_image[i]],
keep_prob: 1.0
})[0]
pred = np.argmax(pr)
if pred == 1:
#When judged to be a favorite entertainer
print('%s,%f' % (test_image_name[i], pr[pred] * 100.0))
The result will be output to the standard output, so please redirect to a file as appropriate. The results are sorted in descending order of score and output to complete!
$ cat result.csv | sort -r -t, -k 2 | head -5
1xxxx1_1.jpg,0.5406388165003011
1xxxx1_2.jpg,0.5350551152698707
1xxxx6_1.jpg,0.5310078821076752
1xxxx2_1.jpg,0.5183026050695199
1xxxx0_1.jpg,0.5130400958800978
I'm not sure, but we introduced it as an example of our efforts in our subcommittee.
Recommended Posts