7.8
Added a process to automatically determine the PC environment with gethostname () and use GPU for iMac environment and GPU for Google Colab environment.
import socket
host = socket.gethostname()
#Get an IP address
# iMac : xxxxxxxx
# Lenovo : yyyyyyyy
# google colab :random
if host == 'xxxxxxxx':
gpu_en = 0
elif host == 'yyyyyyyy':
gpu_en = 0
else:
gpu_en = 1
if gpu_en == 1:
from chainer import cuda, Variable
if gpu_en == 1:
model.to_gpu()
if gpu_en == 1:
return(Variable(cuda.to_gpu(np.array(mini_batch_data, dtype=np.float32))),
Variable(cuda.to_gpu(np.array(mini_batch_move, dtype=np.int32))))
elif gpu_en == 0:
return np.array(mini_batch_data, dtype=np.float32), np.array(mini_batch_move, dtype=np.int32)
Since pickle creation with google colab is slow, once I went to pickle creation on the iMac side, when I tried to read the pickle file with colab and learn it, an error that does not support the protocol appeared. The cause is as follows. pickle supports Protocol 5 with Python 3.8 and above. iMac is Python 3.8.2 colab is Python 3.6.9 Pickle.dump (positions_train, f, pickle.HIGHEST_PROTOCOL) on iMac creates a protocol 5 pickle file. When I try to learn with Colab using this pickle file, I get an error that the protocol is not supported and Colab cannot read it. If you delete pickle.HIGHEST_PROTOCOL, a pickle file will be created with the default protocol 4. Now you can load it in Colab.
train_policy.py
python-dlshogi\train_policy.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#Environmental setting
#-----------------------------
import socket
host = socket.gethostname()
#Get an IP address
# iMac : xxxxxxxx
# Lenovo : yyyyyyyy
# google colab :random
if host == 'xxxxxxxx':
gpu_en = 0
elif host == 'yyyyyyyy':
gpu_en = 0
else:
gpu_en = 1
#Other notes
#If pickle creation in google colab is slow and you want to create pickle locally and load it in google colab
#Remove the pickle protocol option (make it the default).
#colab Python ver is 3.Because it is not 8 and the Highest protocol is not supported.
# pickle.dump(positions_train, f, pickle.HIGHEST_PROTOCOL)Line
#-----------------------------
import numpy as np
import chainer
if gpu_en == 1:
from chainer import cuda, Variable
from chainer import optimizers, serializers
import chainer.functions as F
from pydlshogi.common import *
from pydlshogi.network.policy import PolicyNetwork
from pydlshogi.features import *
from pydlshogi.read_kifu import *
import argparse
import random
import pickle
import os
import re
import logging
parser = argparse.ArgumentParser()
parser.add_argument('kifulist_train', type=str, help='train kifu list')
parser.add_argument('kifulist_test', type=str, help='test kifu list')
parser.add_argument('--batchsize', '-b', type=int, default=32, help='Number of positions in each mini-batch')
parser.add_argument('--test_batchsize', type=int, default=512, help='Number of positions in each test mini-batch')
parser.add_argument('--epoch', '-e', type=int, default=1, help='Number of epoch times')
parser.add_argument('--model', type=str, default='model/model_policy', help='model file name')
parser.add_argument('--state', type=str, default='model/state_policy', help='state file name')
parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file')
parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot')
parser.add_argument('--log', default=None, help='log file path')
parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
parser.add_argument('--eval_interval', '-i', default=1000, type=int, help='eval interval')
args = parser.parse_args()
logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s',
datefmt='%Y/%m/%d %H:%M:%S', filename=args.log, level=logging.DEBUG)
model = PolicyNetwork() #Self-made model with external module
if gpu_en == 1:
model.to_gpu()
optimizer = optimizers.SGD(lr=args.lr) #Instantiate an SGD class
# optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.9) #Instantiate the MomentumSGD class
optimizer.setup(model)
# Init/Resume
if args.initmodel:
logging.info('Load model from {}'.format(args.initmodel))
serializers.load_npz(args.initmodel, model)
if args.resume:
logging.info('Load optimizer state from {}'.format(args.resume))
serializers.load_npz(args.resume, optimizer)
logging.info('read kifu start')
#If there is a saved pickle file, load the pickle file
# train data
#Search for and delete the extension of the training game record list (the list itself, not the contents of the list),.Store the string with pickle in a variable.
train_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_train) + '.pickle'
if os.path.exists(train_pickle_filename):
with open(train_pickle_filename, 'rb') as f: # train_pickle_What is in filename is read_kifu output position[([piece_bb 15 elements], [occupied 2 elements], [pieces_in_hand 2 element], [move_label 1 element], [win 1 element]), (Same set),... is the number of phases x the number of games]
positions_train = pickle.load(f)
logging.info('load train pickle')
else:
positions_train = read_kifu(args.kifulist_train)
# test data
test_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_test) + '.pickle'
if os.path.exists(test_pickle_filename):
with open(test_pickle_filename, 'rb') as f:
positions_test = pickle.load(f)
logging.info('load test pickle')
else:
positions_test = read_kifu(args.kifulist_test)
#If there is no saved pickle, dump (put) the contents of the variable read by the above else into the pickle file and save it.
if not os.path.exists(train_pickle_filename):
with open(train_pickle_filename, 'wb') as f: #Open an empty pickle file
pickle.dump(positions_train, f, pickle.HIGHEST_PROTOCOL) #Put the data in the opened pickle file.
logging.info('save train pickle')
if not os.path.exists(test_pickle_filename):
with open(test_pickle_filename, 'wb') as f:
pickle.dump(positions_test, f, pickle.HIGHEST_PROTOCOL)
logging.info('save test pickle')
logging.info('read kifu end')
logging.info('train position num = {}'.format(len(positions_train))) #Number of outermost elements of positions=The number of phases is output
logging.info('test position num = {}'.format(len(positions_test)))
# mini batch
def mini_batch(positions, i, batchsize):
mini_batch_data = []
mini_batch_move = []
for b in range(batchsize):
features, move, win = make_features(positions[i + b]) #For loop the outermost element number of positions, that is, the aspect
mini_batch_data.append(features) #Append features for each phase=Policy network input data
mini_batch_move.append(move) #Append the move for each phase=Policy network teacher data
if gpu_en == 1:
return(Variable(cuda.to_gpu(np.array(mini_batch_data, dtype=np.float32))),
Variable(cuda.to_gpu(np.array(mini_batch_move, dtype=np.int32))))
elif gpu_en == 0:
return np.array(mini_batch_data, dtype=np.float32), np.array(mini_batch_move, dtype=np.int32)
def mini_batch_for_test(positions, batchsize):
mini_batch_data = []
mini_batch_move = []
for b in range(batchsize):
features, move, win = make_features(random.choice(positions)) #Randomly choose from positions
mini_batch_data.append(features)
mini_batch_move.append(move)
if gpu_en == 1:
return(Variable(cuda.to_gpu(np.array(mini_batch_data, dtype=np.float32))),
Variable(cuda.to_gpu(np.array(mini_batch_move, dtype=np.int32))))
elif gpu_en == 0:
return np.array(mini_batch_data, dtype=np.float32), np.array(mini_batch_move, dtype=np.int32)
#↑ Preparation for learning
# #↓ Learning loop
logging.info('start training')
itr = 0
sum_loss = 0
for e in range(args.epoch):
positions_train_shuffled = random.sample(positions_train, len(positions_train))
# positions_train is[([piece_bb 15 elements], [occupied 2 elements], [pieces_in_hand 2 element], [move_label 1 element], [win 1 element]), (Same set),... is the number of phases x the number of games]
# random.sample(a,b)Randomly returns b element from a
itr_epoch = 0
sum_loss_epoch = 0
for i in range(0, len(positions_train_shuffled) - args.batchsize, args.batchsize):
#Forward propagation
x, t = mini_batch(positions_train_shuffled, i, args.batchsize) #x: Phase diagram=Input, t: move=Teacher data
y = model(x) #model is a self-made model with an external module
model.cleargrads() #Gradient initialization
#Loss calculation
loss = F.softmax_cross_entropy(y, t)
#Inverse error propagation
loss.backward() #Calculate the gradient
#Update parameters using gradient
optimizer.update()
itr += 1
sum_loss += loss.data
itr_epoch += 1
sum_loss_epoch += loss.data
#Evaluate at regular intervals (eval)_Execute every interval)
# print train loss and test accuracy
if optimizer.t % args.eval_interval == 0: # a %b returns the remainder of a divided by b. t is the update step. update()Incremented with.
x, t = mini_batch_for_test(positions_test, args.test_batchsize) # x =Phase diagram, t=Move
y = model(x)
logging.info('epoch = {}, iteration = {}, loss = {}, accuracy = {}'.format(
optimizer.epoch + 1, optimizer.t, sum_loss / itr, F.accuracy(y,t).data))
itr = 0
sum_loss = 0
# validate test data
logging.info('validate test data')
itr_test = 0
sum_test_accuracy = 0
for i in range(0, len(positions_test) - args.batchsize, args.batchsize): # positions_test is kifulist_read from test_Substituting the result of kifu
x, t = mini_batch(positions_test, i, args.batchsize) #Exactly the same as during training
y = model(x)
itr_test += 1
sum_test_accuracy += F.accuracy(y, t).data
logging.info('epoch = {}, iteration = {}, train loss avr = {}, test accuracy = {}'.format(
optimizer.epoch + 1, optimizer.t, sum_loss_epoch / itr_epoch, sum_test_accuracy / itr_test))
#1 Notify the optimizer when the epoch is finished to process the next epoch
optimizer.new_epoch()
#Save model and optimizer state after learning all epochs
logging.info('save the model')
serializers.save_npz(args.model, model)
print('save the optimizer')
serializers.save_npz(args.state, optimizer)
Recommended Posts