policy_player.py y = self.model(x) y is the value of the output layer before passing through the activation function.
logits = y.data[0] Assign the value of the output layer before passing through the activation function to a variable called logits. The word logit means the value of the output layer before passing through the activation function.
Meaning of [0] An example of y.data [[-4.137782 0.12063725 -4.907426 ... -5.663455 -6.104148 -7.8398824 ]] y.data[0] [-4.137782 0.12063725 -4.907426 ... -5.663455 -6.104148 -7.8398824 ]
When generating x, features are enclosed in [] and then made into np.array. x = Variable(cuda.to_gpu(np.array([features], dtype=np.float32))) So is y.data in the form of [[]]? What is the meaning of []?
The number of elements in y.data [0] is (20 + 7) * 9 * 9 = 2187 20 is the direction of movement (UP, DOWN, ...), 7 is the type of piece you have. The number of moves including all legal and illegal moves.
In the value network that appears in Chapter 10, x is generated without enclosing it in []. x = Variable(cuda.to_gpu(np.array(features, dtype=np.float32))) In Chapter 10, we filtered by legal hand first, and it's a little different. It is confusing when compared simply.
probabilities = F.softmax(y).data[0] The probabilities are [1.3974859e-04 9.8799672e-03 6.4728469e-05 ... 3.0391777e-05 1.9559853e-05 3.4478303e-06]
Make it run on both iMac and Colab.
#Environmental setting
#-----------------------------
import socket
host = socket.gethostname()
#Get an IP address
# google colab :random
# iMac : xxxxxxxx
# Lenovo : yyyyyyyy
# env
# 0: google colab
# 1: iMac (no GPU)
# 2: Lenovo (no GPU)
# gpu_en
# 0: disable
# 1: enable
if host == 'xxxxxxxx':
env = 1
gpu_en = 0
elif host == 'yyyyyyyy':
env = 2
gpu_en = 0
else:
env = 0
gpu_en = 1
if gpu_en == 1:
from chainer import cuda, Variable
def __init__(self):
super().__init__()
if env == 0:
self.modelfile = '/content/drive/My Drive/・ ・ ・/python-dlshogi/model/model_policy'
elif env == 1:
self.modelfile = r'/Users/・ ・ ・/python-dlshogi/model/model_policy' #Measures created by learning Network model
elif env == 2:
self.modelfile = r"C:\Users\・ ・ ・\python-dlshogi\model\model_policy"
self.model = None
if gpu_en == 1:
self.model.to_gpu()
if gpu_en == 1:
x = Variable(cuda.to_gpu(np.array([features], dtype=np.float32)))
elif gpu_en == 0:
x = np.array([features], dtype=np.float32)
if gpu_en == 1:
logits = cuda.to_cpu(y.data)[0]
probabilities = cuda.to_cpu(F.softmax(y).data)[0]
elif gpu_en == 0:
logits = y.data[0]
probabilities = F.softmax(y).data[0]
Try to choose between the Greedy strategy and the Softmax strategy. It was difficult to understand how to write a book, so I rewrote it.
#strategy
# 'greedy':Greedy Strategy
# 'boltzmann':Softmax strategy
algorithm ='boltzmann'
if algorithm == 'greedy':
#(1) Select the move with the highest probability (greedy strategy) Simply return the element with the highest probability.
selected_index = greedy(legal_logits)
elif algorithm =='boltzmann':
#(2) Choose a hand according to the probability (Softmax strategy) Randomly return elements with a high probability.
selected_index = boltzmann(np.array(legal_logits, dtype=np.float32), 0.5)
python-dlshogi\pydlshogi\player\policy_player.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#Environmental setting
#-----------------------------
import socket
host = socket.gethostname()
#Get an IP address
# google colab :random
# iMac : xxxxxxxx
# Lenovo : yyyyyyyy
# env
# 0: google colab
# 1: iMac (no GPU)
# 2: Lenovo (no GPU)
# gpu_en
# 0: disable
# 1: enable
if host == 'xxxxxxxx':
env = 1
gpu_en = 0
elif host == 'yyyyyyyy':
env = 2
gpu_en = 0
else:
env = 0
gpu_en = 1
#strategy
# 'greedy':Greedy Strategy
# 'boltzmann':Softmax strategy
algorithm ='boltzmann'
#-----------------------------
import numpy as np
import chainer
from chainer import serializers
import chainer.functions as F
if gpu_en == 1:
from chainer import cuda, Variable
import shogi
from pydlshogi.common import *
from pydlshogi.features import *
from pydlshogi.network.policy import *
from pydlshogi.player.base_player import *
def greedy(logits): #Returns the index of the element with the maximum value among the elements of the list specified in the argument
#In a neural network, logits are the values before passing through the activation function.
return logits.index(max(logits)) #list.index returns the number element of the list itself that the value specified in the argument is.
def boltzmann(logits, temperature):
logits /= temperature # a /=b is a= a /Meaning of b
logits -= logits.max() # a -=b is a= a -The meaning of b. It will be a negative value. The maximum value is 0.
probabilities = np.exp(logits) # x =<0 exp function
probabilities /= probabilities.sum()
return np.random.choice(len(logits), p=probabilities) # choice(i, p=b)Is 0 to i-Randomly returns numbers up to 1 with a probability of b
class PolicyPlayer(BasePlayer):
def __init__(self):
super().__init__()
if env == 0:
self.modelfile = '/content/drive/My Drive/・ ・ ・/python-dlshogi/model/model_policy'
elif env == 1:
self.modelfile = r'/Users/・ ・ ・/python-dlshogi/model/model_policy' #Measures created by learning Network model
elif env == 2:
self.modelfile = r"C:\Users\・ ・ ・\python-dlshogi\model\model_policy"
self.model = None
def usi(self): #GUI software side: Send USI command after startup. USI side: Returns id (and option) and usiok.
print('id name policy_player')
print('option name modelfile type string default ' + self.modelfile)
print('usiok')
def setoption(self, option):
if option[1] == 'modelfile':
self.modelfile = option[3]
def isready(self): #GUI software side: Send is ready command before the game starts. USI side: Initializes and returns ready ok.
if self.model is None:
self.model = PolicyNetwork()
if gpu_en == 1:
self.model.to_gpu()
serializers.load_npz(self.modelfile, self.model)
print('readyok')
def go(self):
if self.board.is_game_over():
print('bestmove resign')
return
features = make_input_features_from_board(self.board)
if gpu_en == 1:
x = Variable(cuda.to_gpu(np.array([features], dtype=np.float32)))
elif gpu_en == 0:
x = np.array([features], dtype=np.float32)
with chainer.no_backprop_mode():
y = self.model(x)
if gpu_en == 1:
logits = cuda.to_cpu(y.data)[0]
probabilities = cuda.to_cpu(F.softmax(y).data)[0]
elif gpu_en == 0:
logits = y.data[0] #Assign the value before passing through the activation function to the variable. Take out the first element as shown below.
# y.data is[[-4.137782 0.12063725 -4.907426 ... -5.663455 -6.104148 -7.8398824 ]]
# y.data[0]Is[-4.137782 0.12063725 -4.907426 ... -5.663455 -6.104148 -7.8398824 ]
#By the way, y.data[0]The number of elements of(20 + 7) * 9 * 9 = 2187
probabilities = F.softmax(y).data[0]
#probabilities[1.3974859e-04 9.8799672e-03 6.4728469e-05 ... 3.0391777e-05 1.9559853e-05 3.4478303e-06]
#About all legal hands
legal_moves = []
legal_logits = []
for move in self.board.legal_moves:
#Convert to label
label = make_output_label(move, self.board.turn) #Direction of movement+Substitute 27 of the possession piece and 9x9 of the destination to label
#Probability of legal move and its move(logits)Store
legal_moves.append(move)
legal_logits.append(logits[label]) #label represents the index of the move. Legal the probability of that move_Assign to logits.
#Show probability
print('info string {:5} : {:.5f}'.format(move.usi(), probabilities[label]))
if algorithm == 'greedy':
#(1) Select the move with the highest probability (greedy strategy) Simply return the element with the highest probability.
selected_index = greedy(legal_logits)
elif algorithm =='boltzmann':
#(2) Choose a hand according to the probability (Softmax strategy) Randomly return elements with a high probability.
selected_index = boltzmann(np.array(legal_logits, dtype=np.float32), 0.5)
bestmove = legal_moves[selected_index]
print('bestmove', bestmove.usi())
2g2f (2 6 steps) 0.48551 7g7f (7 six steps) 0.40747 I pointed to 2 six steps. There seems to be no problem.
This time I pointed to 76 steps. It seems that the softmax strategy randomly points to a hand with a high probability. No problem.
Recommended Posts