Try regression with TensorFlow

Since TensorFlow came out, I tried to study other than MNIST. Regression is performed to create a correlation. (Addition) Since the output of the intermediate layer was too large for the number of data, the value of the intermediate layer was corrected.

Overview

--The data used is diabetes --Weight and bias are set fairly appropriately.

Source code

import sklearn
import tensorflow as tf
from sklearn import datasets
import numpy as np

diabetes = datasets.load_diabetes()

#Load data
print "load diabetes data"
data = diabetes["data"].astype(np.float32)
target = diabetes['target'].astype(np.float32).reshape(len(diabetes['target']), 1)
#Divided into training data and test data
N=342
x_train, x_test = np.vsplit(data, [N])
y_train, y_test = np.vsplit(target, [N])
N_test = y_test.size

x= tf.placeholder("float",shape=[None,10])

#1st layer Input 10 Output 256
with tf.name_scope('l1') as scope:
    weightl1 = tf.Variable(tf.truncated_normal([10, 256], stddev=0.1),name="weightl1")
    biasel1 = tf.Variable(tf.constant(1.0, shape=[256]), name="biasel1")
    outputl1=tf.nn.relu(tf.matmul(x,weightl1) + biasel1)

#2nd layer Input 256 Output 1
with tf.name_scope('l2') as scope:
    weightl2 = tf.Variable(tf.truncated_normal([256, 1], stddev=0.1),name="weightl2")
    biasel2 = tf.Variable(tf.constant(1.0, shape=[1]), name="biasel2")
    outputl2=tf.nn.relu(tf.matmul(outputl1,weightl2) + biasel2)


    
"""
Function for error calculation
Calculate error with MSE
"""
def loss(output):
    with tf.name_scope('loss') as scope:
        loss = tf.reduce_mean(tf.square(output - y_train))
    return loss


loss_op = loss(outputl2)
optimizer = tf.train.AdagradOptimizer(0.04)
train_step = optimizer.minimize(loss_op)

#Error recording
best = float("inf")

#Initialization
init_op = tf.initialize_all_variables()

with tf.Session() as sess:
    #init
    sess.run(init_op)
    for i in range(20001):
        loss_train = sess.run(loss_op, feed_dict={x:x_train})
        sess.run(train_step, feed_dict={x:x_train})
        if loss_train < best:
            best = loss_train
            best_match = sess.run(outputl2, feed_dict={x:x_test})
        if i %1000 == 0:
            print "step {}".format(i)
            pearson = np.corrcoef(best_match.flatten(), y_test.flatten())
            print 'train loss = {} ,test corrcoef={}'.format(best,pearson[0][1])

result

load diabetes data
step 0
train loss = 29000.1777344 ,test corrcoef=0.169487254139
step 1000
train loss = 3080.2097168 ,test corrcoef=0.717823972634
step 2000
train loss = 2969.1887207 ,test corrcoef=0.72972180486
step 3000
train loss = 2938.4609375 ,test corrcoef=0.73486349373
step 4000
train loss = 2915.63330078 ,test corrcoef=0.737497869454
step 5000
train loss = 2896.14331055 ,test corrcoef=0.739029181368
step 6000
train loss = 2875.51708984 ,test corrcoef=0.74006814362
step 7000
train loss = 2856.36816406 ,test corrcoef=0.741115477047
step 8000
train loss = 2838.77026367 ,test corrcoef=0.742113966068
step 9000
train loss = 2822.453125 ,test corrcoef=0.743066699589
step 10000
train loss = 2807.88916016 ,test corrcoef=0.743988699821
step 11000
train loss = 2795.09057617 ,test corrcoef=0.744917437376
step 12000
train loss = 2783.8828125 ,test corrcoef=0.745871358086
step 13000
train loss = 2773.68457031 ,test corrcoef=0.747112534114
step 14000
train loss = 2764.80224609 ,test corrcoef=0.748115829411
step 15000
train loss = 2756.6628418 ,test corrcoef=0.748800330555
step 16000
train loss = 2749.1340332 ,test corrcoef=0.749471871992
step 17000
train loss = 2741.78881836 ,test corrcoef=0.750184567587
step 18000
train loss = 2734.56054688 ,test corrcoef=0.750722087518
step 19000
train loss = 2727.18579102 ,test corrcoef=0.751146409281
step 20000
train loss = 2719.29101562 ,test corrcoef=0.751330770654

problem

--For some reason, sometimes corrcoef = nan (under investigation). --outputl2 is all 0. -> Maybe there is a gradient explosion problem.

It seems that you can set it quite freely. It seems that it will take some time to master it.