introduction

Salut, c'est ikki. Enfin, j'ai résolu le problème d'alimentation d'un grand nombre d'images à TensorFlow. Quand je l'ai résolu, c'était vraiment embarrassant. .. .. Ne copiez pas et ne collez pas à mi-chemin!

Problèmes précédents

Ici a publié un programme pour apprendre des images préparées par moi-même, mais il n'est pas possible d'apprendre des dizaines de milliers de données d'image telles quelles. C'était.

Donc, tout d'abord, je publierai le programme modifié.

`train.py`


#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform

NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('save_model', 'models/model.ckpt', 'File name of model data')
flags.DEFINE_string('train', 'training/train.txt', 'File name of train data')
flags.DEFINE_string('test', 'training/test.txt', 'File name of test data')
flags.DEFINE_string('train_dir', '/tmp/pict_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 201, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 256, 'Batch size'
                     'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')

def inference(images_placeholder, keep_prob):
    ###############################################################
    #Fonctions qui créent des modèles d'apprentissage en profondeur
    #argument: 
    #  images_placeholder: inputs()Espace réservé de l'image créée dans
    #  keep_prob:lieu de taux d'abandon_holder
    #Valeur de retour:
    #  cross_entropy:Résultat du calcul du modèle
    ###############################################################
    #Écart type de poids 0.Initialisé avec une distribution normale de 1
    def weight_variable(shape):
      initial = tf.truncated_normal(shape, stddev=0.1)
      return tf.Variable(initial)
    #Biais par rapport à l'écart type 0.Initialisé avec une distribution normale de 1
    def bias_variable(shape):
      initial = tf.constant(0.1, shape=shape)
      return tf.Variable(initial)
    #Créer une couche convolutive
    def conv2d(x, W):
      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    #Créer une couche de pooling
    def max_pool_2x2(x):
      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')
    #Transformez l'entrée en 28x28x3
    x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
    #Création de la couche de convolution 1
    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
    #Création de la couche de pooling 1
    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    #Création de la couche de convolution 2
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    #Création du pooling layer 2
    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)
    #Création de la couche 1 entièrement connectée
    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7*7*64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        #paramètres d'abandon
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    #Création de la couche 2 entièrement connectée
    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])
    #Normalisation par fonction softmax
    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return y_conv

def loss(logits, labels):
    ###############################################################
    #Fonction pour calculer la perte
    #argument: 
    #  logits:Tenseur logit, float - [batch_size, NUM_CLASSES]
    #  labels:Tenseur d'étiquette, int32 - [batch_size, NUM_CLASSES]
    #Valeur de retour:
    #  cross_entropy:Résultat du calcul du modèle
    ###############################################################
    cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits,1e-10,1.0)))
    tf.scalar_summary("cross_entropy", cross_entropy)
    return cross_entropy

def training(loss, learning_rate):
    ###############################################################
    #Fonctions qui définissent les opérations d'entraînement
    #argument: 
    #  loss:Tenseur de perte, loss()Résultat de
    #  learning_rate:Coefficient d'apprentissage
    #Valeur de retour:
    #  train_step:Op de formation
    ###############################################################
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return train_step

def accuracy(logits, labels):
    ###############################################################
    #Taux de réponse correct(accuracy)Fonction à calculer
    #argument: 
    #  logits: inference()Résultat de
    #  labels:Tenseur d'étiquette, int32 - [batch_size, NUM_CLASSES]
    #Valeur de retour:
    #  accuracy:Taux de réponse correct(float)
    ###############################################################
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.scalar_summary("accuracy", accuracy)
    return accuracy

if __name__ == '__main__':
    #Fichier ouvert
    with open(FLAGS.train, 'r') as f: # train.txt
        train_image = []
        train_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            train_image.append(img.flatten().astype(np.float32)/255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            train_label.append(tmp)
        train_image = np.asarray(train_image)
        train_label = np.asarray(train_label)
        train_len = len(train_image)

    with open(FLAGS.test, 'r') as f: # test.txt
        test_image = []
        test_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            test_image.append(img.flatten().astype(np.float32)/255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            test_label.append(tmp)
        test_image = np.asarray(test_image)
        test_label = np.asarray(test_label)
        test_len = len(test_image)
    
    with tf.Graph().as_default():
        images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
        labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
        keep_prob = tf.placeholder("float")

        logits = inference(images_placeholder, keep_prob)
        loss_value = loss(logits, labels_placeholder)
        train_op = training(loss_value, FLAGS.learning_rate)
        acc = accuracy(logits, labels_placeholder)

        saver = tf.train.Saver()
        sess = tf.Session()
        sess.run(tf.initialize_all_variables())
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)

        #Exécution de la formation
        if train_len % FLAGS.batch_size is 0:
            train_batch = train_len/FLAGS.batch_size
        else:
            train_batch = (train_len/FLAGS.batch_size)+1
            print "train_batch = "+str(train_batch)
        for step in range(FLAGS.max_steps):
            for i in range(train_batch):
                batch = FLAGS.batch_size*i
                batch_plus = FLAGS.batch_size*(i+1)
                if batch_plus > train_len: batch_plus = train_len
                # feed_Spécifiez les données à mettre dans l'espace réservé avec dict
                sess.run(train_op, feed_dict={
                  images_placeholder: train_image[batch:batch_plus],
                  labels_placeholder: train_label[batch:batch_plus],
                  keep_prob: 0.5})
                  
            if step % 10 == 0:
                train_accuracy = 0.0
                for i in range(train_batch):
                    batch = FLAGS.batch_size*i
                    batch_plus = FLAGS.batch_size*(i+1)
                    if batch_plus > train_len: batch_plus = train_len
                    train_accuracy += sess.run(acc, feed_dict={
                        images_placeholder: train_image[batch:batch_plus],
                        labels_placeholder: train_label[batch:batch_plus],
                        keep_prob: 1.0})
                    if i is not 0: train_accuracy /= 2.0
                #Ajoutez la valeur à afficher sur le TensorBoard après chaque 10 étapes
                #summary_str = sess.run(summary_op, feed_dict={
                #    images_placeholder: train_image,
                #    labels_placeholder: train_label,
                #    keep_prob: 1.0})
                #summary_writer.add_summary(summary_str, step)
                print "step %d, training accuracy %g"%(step, train_accuracy)

    if test_len % FLAGS.batch_size is 0:
        test_batch = test_len/FLAGS.batch_size
    else:
        test_batch = (test_len/FLAGS.batch_size)+1
        print "test_batch = "+str(test_batch)
    test_accuracy = 0.0
    for i in range(test_batch):
        batch = FLAGS.batch_size*i
        batch_plus = FLAGS.batch_size*(i+1)
        if batch_plus > train_len: batch_plus = train_len
        test_accuracy += sess.run(acc, feed_dict={
                images_placeholder: test_image[batch:batch_plus],
                labels_placeholder: test_label[batch:batch_plus],
                keep_prob: 1.0})
        if i is not 0: test_accuracy /= 2.0
    print "test accuracy %g"%(test_accuracy)
    save_path = saver.save(sess, FLAGS.save_model)

Pour le moment, je pense que cela fonctionnera avec le copier-coller. (Copions et collons après avoir compris!) Il y a des corrections mineures, mais les principaux changements qui ont conduit à la solution sont train_accuracy et test_accuracy, qui calculent la précision. Je suis passé au traitement par lots. Eh bien, pourquoi avez-vous effectué un traitement par lots là où vous avez appris et ne l'avez pas fait lorsque vous avez calculé la précision ... ~~ Peut-être que des gens formidables riront lorsqu'ils verront le programme, en disant: "Pourquoi faites-vous cela? Vous obtiendrez une erreur de mémoire w Pupupu ~ www". En fait, j'ai ri au moment où j'ai remarqué. ~~ Mis à part les négatifs, cela prendra du temps, peu importe le nombre d'images que vous mangez, mais je pense que cela vous apprendra bien.

J'écrirai un peu plus en détail comment gérer ce programme.

Structure des dossiers

Placez train.txt et test.txt dans le même dossier que ce train.py. Le contenu de chaque texte est le suivant

`lang:train.txt&test.txt`


/home/picture/images/image1.jpg 0
/home/picture/images/image2.jpg 0
/home/picture/images/image3.jpg 1
/home/picture/images/image4.jpg 2
/home/picture/images/image5.jpg 1
/home/picture/images/image6.jpg 1
            :
            :

Se sentir comme ça. Cette fois, il est classé en 3 classes, donc ʻNom du fichier image Nom du numéro de classe (0 ~ 2) ça ira. J'écris toujours avec un chemin absolu dans de tels cas. Ce fichier texte est difficile à écrire par vous-même, alors écrivons un programme pour le générer automatiquement. Je le posterai si demandé, mais ... De plus, cette fois, j'ai créé un dossier appelétraining dans le même dossier que train.py` et y ai placé les deux fichiers texte.

De plus, si vous créez un dossier appelé models, un fichier .ckpt des résultats d'apprentissage sera créé.

Comment utiliser les résultats d'apprentissage

C'est juste un exemple.

`test.py`


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *

NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('readmodels', 'models/model.ckpt', 'File name of model data')

def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
      initial = tf.truncated_normal(shape, stddev=0.1)
      return tf.Variable(initial)

    def bias_variable(shape):
      initial = tf.constant(0.1, shape=shape)
      return tf.Variable(initial)

    def conv2d(x, W):
      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')
    
    x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7*7*64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    return y_conv

if __name__ == '__main__':
    test_image = []
    for i in range(1, len(sys.argv)):
        img = cv2.imread(sys.argv[i])
        print img
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        test_image.append(img.flatten().astype(np.float32)/255.0)
    test_image = np.asarray(test_image)

    images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
    keep_prob = tf.placeholder("float")

    logits = inference(images_placeholder, keep_prob)
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    saver.restore(sess,FLAGS.readmodels)

    for i in range(len(test_image)):
        pr = logits.eval(feed_dict={ 
            images_placeholder: [test_image[i]],
            keep_prob: 1.0 })[0]
        pred = np.argmax(pr)
        print pr
        print pred
    print "finish"

fichier image python test.py Si vous l'exécutez avec, vous devriez obtenir le résultat de la reconnaissance. Surtout l'explication est bonne.

Problèmes qui n'ont pas encore été résolus

En fait, train.py est également compatible avec Tensorboard, mais j'ai du mal car la partie récapitulative qui dessine le graphique ne peut pas être traitée par lots. Le problème est que le type de la valeur renvoyée par sess.run (summary_op, ~~) est de type str, et il y a des valeurs inconnues pour Tensorboard. ** Il n'est pas possible de dire "additionner les résultats calculés pour chaque lot et dessiner un graphique (prendre la moyenne)" **.

Faites-moi savoir si vous avez une solution ou si vous la remarquez. .. ..

TensorFlow Pour apprendre d'un grand nombre d'images ... ~ (presque) solution ~