Learning model creation, learning and reasoning

Introduction

--A few years ago, I did a tutorial on MNIST CNN for TensorFlow and left it alone. This is a common story. ――This time, I modified the tutorial to create a learning model for face images. --The class and the 10 image size are the same as 28x28. The accuracy is around 80%. Well, it's a play, so I'm satisfied. --The complete source is here.

model

--For the model itself, I referred to the following. --Reference: TensorFlow mnist_deep.py --num_classes ʻimg_rows ʻimg_cols uses the value of the configuration file. Added support for changing the number of classes and image size.

def model():
    """MNIST reference model."""

    num_classes = len(CLASSES)
    img_rows, img_cols = IMG_ROWS, IMG_COLS

    x = tf.compat.v1.placeholder(tf.float32, [None, img_rows*img_cols])

    with tf.name_scope('reshape'):
        x_image = tf.reshape(x, [-1, img_rows, img_cols, 1])

    with tf.name_scope('conv1'):
        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    with tf.name_scope('pool1'):
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('conv2'):
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2'):
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1'):
        W_fc1 = weight_variable([int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64, 1024])
        b_fc1 = bias_variable([1024])

        h_pool2_flat = tf.reshape(h_pool2, [-1, int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    with tf.name_scope('dropout'):
        keep_prob = tf.compat.v1.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, rate=1-keep_prob)

    with tf.name_scope('fc2'):
        W_fc2 = weight_variable([1024, num_classes])
        b_fc2 = bias_variable([num_classes])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    return x, y_conv, keep_pro

――The following also uses the tutorial.

def conv2d(x, W):
    """conv2d returns a 2d convolution layer with full stride."""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    """max_pool_2x2 downsamples a feature map by 2X."""
    return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def weight_variable(shape):
    """weight_variable generates a weight variable of a given shape."""
    initial = tf.random.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """bias_variable generates a bias variable of a given shape."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

Learning

Learning model

――We are making it possible to read the previously created data set. --Also, the batch size and epoch can be changed.

def train(datasets, batch_size=128, epochs=12):
    """Learning."""

    x, y_conv, keep_prob = model()

    y_ = tf.compat.v1.placeholder(tf.float32, [None, 10])

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

--Almost the same as the tutorial, but added saving of the model. --The save location is specified from the configuration file.

    saver = tf.compat.v1.train.Saver()
    os.makedirs(os.path.dirname(os.path.abspath(MODEL_FILE)), exist_ok=True)

――We modified it from the tutorial so that the accuracy can be displayed and the model can be saved for each epoch.

    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        next_epoch = 1
        print('epoch, train accuracy, test accuracy')
        while datasets.train.epochs_completed < epochs:
            train_images, train_labels = datasets.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={x: train_images, y_: train_labels, keep_prob: 0.5})

            if datasets.train.epochs_completed == next_epoch:

                train_accuracy = accuracy.eval(feed_dict={x: datasets.train.images, y_: datasets.train.labels, keep_prob: 1.0})
                test_accuracy = accuracy.eval(feed_dict={x: datasets.test.images, y_: datasets.test.labels, keep_prob: 1.0})
                print('{:d}, {:.4f}, {:.4f}'.format(datasets.train.epochs_completed, train_accuracy, test_accuracy))

                saver.save(sess, MODEL_FILE)

                next_epoch = datasets.train.epochs_completed + 1

Execution of learning

--Training is performed by specifying the --train option. --Batch size is 128 and epoch is 120.

$ python face_deep.py --train
epoch, train accuracy, test accuracy
1, 0.4580, 0.4090
2, 0.5593, 0.4880
abridgement
119, 1.0000, 0.8110
120, 1.0000, 0.792

image.png

inference

inference

--The image must be numpy. --You can change the result type with dtype.

def predict(images, dtype=None):
    """The inference result is numpy, int,Switch argmax with dtype."""

    tf.compat.v1.reset_default_graph()

    x, y_conv, keep_prob = model()

    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        saver = tf.compat.v1.train.Saver()
        saver.restore(sess, MODEL_FILE)

        results = sess.run(tf.nn.softmax(y_conv), feed_dict={x: images, keep_prob: 1.0})
        results = np.array(results * 100, dtype=np.uint8)
        if dtype == 'int':
            results = [[int(y) for y in result] for result in results]
        if dtype == 'argmax':
            results = [np.argmax(y) for y in results]

    return results

Executing inference

--Inference is performed with no options. ――It is the operation check level to the last. I am thinking of using it from a web application separately. --The following is the inference result of the first 10 arrays of the test image of the dataset. The first few hundreds are labeled 0, so they seem to match.

$ python face_deep.py
abridgement
[[100   0   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [  0  99   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 97   0   0   0   0   0   0   0   0   1]
 [ 99   0   0   0   0   0   0   0   0   0]
 [  0  99   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 36  63   0   0   0   0   0   0   0   0]]

in conclusion

--Modified the tutorial of MNIST CNN of TensorFlow to learn and infer facial images. ――Since it is a study level, it was enough to be able to perform learning and reasoning. --Next time, let's try inference from the Flask web application.

Recommended Posts

Learning model creation, learning and reasoning
Model Complexity and Robustness
Machine learning model considering maintainability
Ensemble learning and basket analysis
Supervised Learning 3 Hyperparameters and Tuning (2)
Machine Learning Super Introduction Probability Model and Maximum Likelihood Estimate
Kaggle House Prices ② ~ Model Creation ~
Machine learning and mathematical optimization
Supervised learning 2 Hyperparameters and tuning (1)
Deep Learning Model Lightening Library Distiller
[Note] Django project creation and terminology
Significance of machine learning and mini-batch learning
Classification and regression in machine learning
Inversely analyze a machine learning model
Organize machine learning and deep learning platforms
Deep learning image recognition 2 model implementation
Creating a learning model using MNIST
Validate the learning model with Pylearn2
[Machine learning] Summary and execution of model evaluation / indicators (w / Titanic dataset)
A collection of tips for speeding up learning and reasoning with PyTorch
I tried to make Kana's handwriting recognition Part 2/3 Data creation and learning
Let's make Godzilla's image recognition model preprocessing, learning and deployment feel good