First Deep Learning ~ Solution ~

Hello everyone. @best_not_best. This article is a continuation of First Deep Learning ~ Struggle ~. If you haven't read it, please read it first. I'm sorry it's been posted for almost a year ...

Caution

This article is a product of personal desires and is not the official view of the organization to which it belongs.

From the conclusion

I was able to find a similar person (`･ ω ･ ´)
I decided to use TensorFlow, which had a similar sample program, instead of Chainer (´ ･ ω ･ `)
Python 2.7 → Python 3.5, so I remade everything (´; ω; `)

environment

Machine / OS
MacBook Pro (Retina, 15-inch, Mid 2014)
OS X Yosemite 10.10.5
Python
- Python 3.5.2 :: Anaconda 4.1.1 (x86_64)
Python package
lxml 3.6.0
selenium 3.0.2
numpy 1.11.2
opencv3 3.1.0 (Install with conda.)
tensorflow 0.10.0
I am running in Anaconda to use OpenCV with Python 3.x.

Procedure again

Collect images of employees
Cut out the face part of the image in 1.
Collect learning images (favorite entertainers)
Cut out the face part of 3.
Collect learning images (appropriately other than your favorite entertainer)
Cut out the face part of 5.
Create a discriminator by learning 4 and 6 with Tensorflow
Discriminate the image in 2. with a discriminator

1. Collect images of employees

Please refer to Preparation for the details of the process.

1.1. Obtaining an employee ID

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import lxml.html
from selenium import webdriver
import os

target_url = 'http://hogehoge.co.jp/list.html'
driver = webdriver.PhantomJS(service_log_path = os.path.devnull)
driver.get(target_url)
root = lxml.html.fromstring(driver.page_source)
links = root.cssselect('td.text12m')
for link in links:
    if link.text is None:
        continue
    if link.text.isdigit():
        print(link.text)

driver.close()
driver.quit()

The employee ID is output to the standard output, so please redirect to a file etc. From now on, this file will be treated as member_id.txt.

1.2. Generate and get the image URL from the employee ID

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import urllib.request
import urllib.parse
import time

#The above employee ID file
ID_LIST = '/path/to/member_id.txt'
#Employee image URL format
URL_FMT = 'http://hogehoge.co.jp/%s.jpg'
#File save destination path format
OUTPUT_FMT = '/path/to/photo/%s.jpg'

opener = urllib.request.build_opener()
urllib.request.install_opener(opener)

for id in open(ID_LIST, 'r'):
    url = URL_FMT % (id.strip())

    try:
        img = urllib.request.urlopen(url, timeout=5).read()
        if len(img) == 0:
            continue

    except urllib.request.URLError:
        print(url, 'URLError')

    except IOError:
        print(url, 'IOError')

    except UnicodeEncodeError:
        print(url, 'EncodeError')

    except OSError:
        print(url, 'OSError')

    else:
        output = OUTPUT_FMT % id.strip()
        file = open(output, 'wb')
        file.write(img)
        file.close()

    time.sleep(0.1)

It will be saved with the following file name.

000001.jpg
000002.jpg
000003.jpg
000004.jpg
000005.jpg
...

2. Cut out the face part of the image in 1.

Please refer to Preparation for the details of the process.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import numpy
import os
import sys
import cv2

#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 1.Directory saved in
INPUT_DIR_PATH = '/path/to/photos/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)

files = os.listdir(INPUT_DIR_PATH)
for file in files:
    input_image_path = INPUT_DIR_PATH + file

    #File reading
    image = cv2.imread(input_image_path)
    #Grayscale conversion
    try:
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:
        continue

    #Acquire the features of the cascade classifier
    cascade = cv2.CascadeClassifier(CASCADE_PATH)

    #Execution of object recognition (face recognition)
    facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))

    if len(facerect) > 0:
        #Saving recognition results
        i = 1
        for rect in facerect:
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]

            path, ext = os.path.splitext(os.path.basename(file))
            output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, path, i, ext)
            try:
                im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
                cv2.imwrite(output_image_path, im)
            except cv2.error:
                print(file)
                continue

            i += 1

It will be saved with the following file name.

000001_1.jpg
000002_1.jpg
000003_1.jpg
000003_2.jpg
000004_1.jpg
...

3. Collect learning images (favorite entertainers)

Please refer to Struggle for the details of the process.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys
import os
import json
import urllib.request
import urllib.parse
import requests
import mimetypes
import re

# API URL
BING_URL = 'https://api.datamarket.azure.com/Bing/Search/Image?'
# API ACCESS KEY
MS_ACCTKEY = 'hogehoge'
QUERY = 'The name of your favorite celebrity'
#Directory for saving acquired images
OUTPUT_DIR_PATH = '/path/to/talent/'

opener = urllib.request.build_opener()
urllib.request.install_opener(opener)

def download_urllist(urllist, skip):
    for url in urllist:
        try:
            img = urllib.request.urlopen(url, timeout=5).read()
            if len(img) == 0:
                continue

            url = re.sub(r'\?.*', '', url)
            mine_type = mimetypes.guess_type(url)[0]
            if mine_type is None:
                mine_type = 'jpeg'
            else:
                mine_type = mine_type.split('/')[1]

            file_name = '%s.%s' % (skip, mine_type)
            with open(OUTPUT_DIR_PATH + file_name, 'wb') as f:
                f.write(img)

        except urllib.request.URLError:
            print('URLError')

        except IOError:
            print('IOError')

        except UnicodeEncodeError:
            print('EncodeError')

        except OSError:
            print('OSError')

        skip += 1

if __name__ == "__main__":
    query = urllib.request.quote(QUERY)
    step = 20
    num = 50

    url_param_dict = {
        'Query': "'"+QUERY+"'",
        'Market': "'ja-JP'",
    }
    url_param_base = urllib.parse.urlencode(url_param_dict)
    url_param_base = url_param_base + '&$format=json&$top=%d&$skip='%(num)

    for skip in range(0, num*step, num):
        url_param = url_param_base + str(skip)
        url = BING_URL + url_param

        response = requests.get(url,
                                auth=(MS_ACCTKEY, MS_ACCTKEY),
                                headers={'User-Agent': 'My API Robot'})
        response = response.json()

        urllist = [item['MediaUrl'] for item in response['d']['results']]
        download_urllist(urllist, skip)

4. Cut out the face part of 3.

Please refer to Struggle for the details of the process.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import numpy
import os
import sys
import cv2

#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 3.Directory saved in
INPUT_DIR_PATH = '/path/to/talent/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/talent_cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)

files = os.listdir(INPUT_DIR_PATH)
for file in files:
    input_image_path = INPUT_DIR_PATH + file

    #File reading
    image = cv2.imread(input_image_path)
    #Grayscale conversion
    try:
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:
        continue

    #Acquire the features of the cascade classifier
    cascade = cv2.CascadeClassifier(CASCADE_PATH)

    #Execution of object recognition (face recognition)
    facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))

    if len(facerect) > 0:
        #Saving recognition results
        i = 1
        for rect in facerect:
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]

            path, ext = os.path.splitext(os.path.basename(file))
            output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, count, i, ext)
            try:
                im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
                cv2.imwrite(output_image_path, im)
            except cv2.error:
                print(file)
                continue

            i += 1

It will be saved with the following file name.

7_3.jpeg
6_1.jpeg
4_1.jpeg
3_1.jpeg
2_1.jpeg
...

5. Collect learning images (appropriately other than your favorite entertainer)

Change the QUERY and ʻOUTPUT_DIR_PATHof the program in 3. to run it. This time, I tried to execute it withQUERY` called" general public ".

QUERY = 'Ordinary people'
OUTPUT_DIR_PATH = '/path/to/other_talent/'

6. Cut out the face part of 5.

I will omit it because it is the same process as 4.

7. Create a discriminator by learning 4 and 6 with Tensorflow

Create a dataset. Label the image file of your favorite celebrity with "1" and sort it randomly.

$ ls -la /path/to/talent_cutout/*.* | awk '{print $9" 1"}' | gsort -R > talent.txt

Divide 80% into learning data and 20% into test data. (The following is divided into 752 and 189 because the total number of files was 941.)

$ head -752 talent.txt > talent_train.txt
$ tail -189 talent.txt > talent_test.txt

Similarly, images other than favorite entertainers are also labeled as "2" and divided into learning data (commons_train.txt) and test data (commons_test.txt). Each training data and test data are combined and randomly sorted.

$ cat commons_train.txt talent_train.txt | gsort -R > train.txt
$ cat commons_test.txt talent_test.txt | gsort -R > test.txt

The contents of the file are as follows.

$ head -5 train.txt
/path/to/other_talent_cutout/152_16.jpeg 2
/path/to/talent_cutout/371_1.jpg 1
/path/to/talent_cutout/349_1.jpg 1
/path/to/talent_cutout/523_2.jpg 1
/path/to/other_talent_cutout/348_2.jpeg 2

Let Tensorflow learn. TensorFlow To learn from a large number of images ... ~ (almost) solution ~ --Qiita was used as a reference.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform

NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * 3

flags = tf.app.flags
FLAGS = flags.FLAGS
#The path of the file to save the learning result
flags.DEFINE_string('save_model', '/path/to/model.ckpt', 'File name of model data')
#Training data path
flags.DEFINE_string('train', '/path/to/train.txt', 'File name of train data.')
#Test data path
flags.DEFINE_string('test', '/path/to/test.txt', 'File name of test data.')
flags.DEFINE_string('train_dir', './log_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 100, 'Number of steps to run trainer.')
flags.DEFINE_integer(
    'batch_size',
    10,
    'Batch size'
    'Must divide evenly into the dataset sizes.'
)
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')

def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(
            x,
            ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1],
            padding='SAME'
        )

    x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)

    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return y_conv

def loss(logits, labels):
    cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits, 1e-10, 1.0)))
    tf.scalar_summary('cross_entropy', cross_entropy)
    return cross_entropy

def training(loss, learning_rate):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return train_step

def accuracy(logits, labels):
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    tf.scalar_summary('accuracy', accuracy)
    return accuracy

if __name__ == '__main__':
    with open(FLAGS.train, 'r') as f: # train.txt
        train_image = []
        train_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            train_image.append(img.flatten().astype(np.float32) / 255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            train_label.append(tmp)
        train_image = np.asarray(train_image)
        train_label = np.asarray(train_label)
        train_len = len(train_image)

    with open(FLAGS.test, 'r') as f:
        test_image = []
        test_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            test_image.append(img.flatten().astype(np.float32) / 255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            test_label.append(tmp)
        test_image = np.asarray(test_image)
        test_label = np.asarray(test_label)
        test_len = len(test_image)

    with tf.Graph().as_default():
        images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
        labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
        keep_prob = tf.placeholder('float')

        logits = inference(images_placeholder, keep_prob)
        loss_value = loss(logits, labels_placeholder)
        train_op = training(loss_value, FLAGS.learning_rate)
        acc = accuracy(logits, labels_placeholder)

        saver = tf.train.Saver()
        sess = tf.Session()
        sess.run(tf.initialize_all_variables())
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)

        if train_len % FLAGS.batch_size is 0:
            train_batch = train_len / FLAGS.batch_size
        else:
            train_batch = (train_len / FLAGS.batch_size) + 1
            print('train_batch = ' + str(train_batch))
        for step in range(FLAGS.max_steps):
            for i in range(int(train_batch)):
                batch = FLAGS.batch_size * i
                batch_plus = FLAGS.batch_size * (i + 1)
                if batch_plus > train_len:
                    batch_plus = train_len

                sess.run(train_op, feed_dict={
                    images_placeholder: train_image[batch: batch_plus],
                    labels_placeholder: train_label[batch: batch_plus],
                    keep_prob: 0.5
                })

            if step % 10 == 0:
                train_accuracy = 0.0
                for i in range(int(train_batch)):
                    batch = FLAGS.batch_size * i
                    batch_plus = FLAGS.batch_size * (i + 1)
                    if batch_plus > train_len: batch_plus = train_len
                    train_accuracy += sess.run(acc, feed_dict={
                        images_placeholder: train_image[batch: batch_plus],
                        labels_placeholder: train_label[batch: batch_plus],
                        keep_prob: 1.0})
                    if i is not 0: train_accuracy /= 2.0

                print('step %d, training accuracy %g' % (step, train_accuracy))

    if test_len % FLAGS.batch_size is 0:
        test_batch = test_len / FLAGS.batch_size
    else:
        test_batch = (test_len / FLAGS.batch_size) + 1
        print('test_batch = ' + str(test_batch))

    test_accuracy = 0.0
    for i in range(int(test_batch)):
        batch = FLAGS.batch_size * i
        batch_plus = FLAGS.batch_size * (i + 1)
        if batch_plus > train_len:
            batch_plus = train_len
        test_accuracy += sess.run(
            acc,
            feed_dict={
                images_placeholder: test_image[batch:batch_plus],
                labels_placeholder: test_label[batch:batch_plus],
                keep_prob: 1.0
            }
        )
        if i is not 0:
            test_accuracy /= 2.0

    print('test accuracy %g' % (test_accuracy))
    save_path = saver.save(sess, FLAGS.save_model)

The learning result is saved in /path/to/model.ckpt.

8. Discriminate the image in 2. with a discriminator

Again, I referred to TensorFlow to learn from a large number of images ... ~ (almost) solution ~ --Qiita.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *

NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * 3
# 2.Directory for saving images cropped with
DIR_PATH = '/path/to/cutout/'

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('readmodels', '/path/to/model.ckpt', 'File name of model data')

def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(
            x,
            ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1],
            padding='SAME'
        )

    x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    return y_conv

if __name__ == '__main__':
    test_image = []
    test_image_name = []
    files = os.listdir(DIR_PATH)
    for file in files:
        if file == '.DS_Store':
            continue

        img = cv2.imread(DIR_PATH + file)
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        test_image.append(img.flatten().astype(np.float32) / 255.0)
        test_image_name.append(file)

    test_image = np.asarray(test_image)

    images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
    keep_prob = tf.placeholder('float')

    logits = inference(images_placeholder, keep_prob)
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    saver.restore(sess,FLAGS.readmodels)

    for i in range(len(test_image)):
        pr = logits.eval(feed_dict={
            images_placeholder: [test_image[i]],
            keep_prob: 1.0
        })[0]
        pred = np.argmax(pr)

        if pred == 1:
            #When judged to be a favorite entertainer
            print('%s,%f' % (test_image_name[i], pr[pred] * 100.0))

The result will be output to the standard output, so please redirect to a file as appropriate. The results are sorted in descending order of score and output to complete!

$ cat result.csv | sort -r -t, -k 2 | head -5
    1xxxx1_1.jpg,0.5406388165003011
    1xxxx1_2.jpg,0.5350551152698707
    1xxxx6_1.jpg,0.5310078821076752
    1xxxx2_1.jpg,0.5183026050695199
    1xxxx0_1.jpg,0.5130400958800978

afterwards

I'm not sure, but we introduced it as an example of our efforts in our subcommittee.

Reference link

TensorFlow To learn from a large number of images ... ~ (almost) solution ~ --Qiita