Même si je dis que je l'ai reproduit, ce n'est qu'une partie de la fonction que je connais. Je n'ai pas encore commencé à étudier l'apprentissage profond, J'ai lu "Make from scratch Deep Learning" d'O'Reilly et je l'ai fait avec Python. J'ai fait quelque chose en C # qui fonctionne de la même manière. C # n'a pas de bibliothèque comme Numpy (quelqu'un pourrait en avoir créé une si vous la cherchez), vous allez donc l'implémenter vous-même.
J'ai déjà publié un article comme celui-ci.
Premier apprentissage profond en C # -Imitation de l'implémentation en Python-
A cette époque, j'ai permis de réaliser un calcul matriciel simple, mais cette fois, je l'ai un peu amélioré.
Pour le moment, cela peut être difficile à comprendre, mais je vais vous présenter le programme d'apprentissage en profondeur Python que j'ai créé dans cette situation.
deeplearning.py
import numpy as np
import matplotlib.pylab as plt
def sigmoid(x):
return 1 / (1+np.exp(-x))
def ident(x):
return x
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val #Restaurer la valeur
it.iternext()
return grad
def softmax(a):
c=np.max(a)
exp_a=np.exp(a-c)
sum_exp_a=np.sum(exp_a)
return exp_a/sum_exp_a
def diff(f,x):
h=1e-4
return (f(x+h)-f(x-h)/(2*h))
def cross_etp_err(y,t):
delta=1e-7
return -np.sum(t*np.log(y+delta))
class testnet:
def __init__(self):
#-----------------------------------------------------
self.X=np.array([1.0,0.5])
self.W1=np.array([[0.0,0.0,0.0],[0.0,0.0,0.0]])
self.B1=np.array([0.0,0.0,0.0])
#-----------------------------------------------------
self.W2=np.array([[0.0,0.0],[0.0,0.0],[0.0,0.0]])
self.B2=np.array([0.0,0.0])
#-----------------------------------------------------
self.W3=np.array([[0.0,0.0],[0.0,0.0]])
self.B3=np.array([0.0,0.0])
#-----------------------------------------------------
self.T=np.array([0,1])
self.w1_grad=np.zeros_like(self.W1)
self.b1_grad=np.zeros_like(self.B1)
self.w2_grad=np.zeros_like(self.W2)
self.b2_grad=np.zeros_like(self.B2)
self.w3_grad=np.zeros_like(self.W3)
self.b3_grad=np.zeros_like(self.B3)
############################
def loss(self):
A1=np.dot(self.X,self.W1)+self.B1
Z1=sigmoid(A1)
A2=np.dot(Z1,self.W2)+self.B2
Z2=sigmoid(A2)
A3=np.dot(Z2,self.W3)+self.B3
Y=softmax(A3)
return cross_etp_err(Y,self.T)
############################
net = testnet()
def f(W):
return net.loss()
rate=0.1
def deep_learning(net):
net.w1_grad=numerical_gradient(f,net.W1)
net.b1_grad=numerical_gradient(f,net.B1)
net.w2_grad=numerical_gradient(f,net.W2)
net.b2_grad=numerical_gradient(f,net.B2)
net.w3_grad=numerical_gradient(f,net.W3)
net.b3_grad=numerical_gradient(f,net.B3)
net.W1-=rate*net.w1_grad
net.B1-=rate*net.b1_grad
net.W2-=rate*net.w2_grad
net.B2-=rate*net.b2_grad
net.W3-=rate*net.w3_grad
net.B3-=rate*net.b3_grad
loop=0;
while loop<100:
deep_learning(net)
print(str(loop)+":"+str(net.B3[0]))
loop+=1
deeplearning.cs
using System.Linq;
namespace Matrix
{
class Mat
{
private int r = 0;
public int R
{
get { return r; }
}
private int c = 0;
public int C
{
get { return c; }
}
private bool err = false;
public bool Err
{
get { return err; }
}
private double[][] matrix_data;
public double[][] Matrix_data
{
get {
double[][] a = new double[2][];
a[0] = new double[] { 0, 0 };
a[1] = new double[] { 0, 0 };
if (err) return a;
else return matrix_data;
}
set
{
matrix_data = value;
}
}
public double[][] Zero_matrix
{
get
{
double[][] zm = new double[this.r][];
for (int i = 0; i < this.r; i++)
{
zm[i] = new double[this.c];
for (int j = 0; j < this.c; j++)
{
zm[i][j] = 0;
}
}
return zm;
}
}
public Mat(params double[][] vs)
{
int len = vs[0].Length;
for (int i = 0; i < vs.Length; i++)
{
if (i != 0 && len != vs[i].Length)
{
err = true;
}
}
if (!err)
{
r = vs.Length;
c = vs[0].Length;
matrix_data = vs;
}
}
public double[][] sigmoid()
{
double[][] sig = new double[1][];
sig[0] = new double[this.c];
for(int i = 0; i < this.c; i++)
{
sig[0][i] = 1 / (1 + System.Math.Exp(this.matrix_data[0][i]));
}
return sig;
}
public double[][] softmax()
{
double[][] sm = new double[1][];
sm[0] = new double[this.c];
double m = this.matrix_data[0].Max();
double[] exp_a = new double[this.c];
for (int i = 0; i < this.c; i++)
{
exp_a[i] = System.Math.Exp(this.matrix_data[0][i] - m);
}
double sum = 0.0;
for (int i = 0; i < this.c; i++)
{
sum = sum + exp_a[i];
}
for (int i = 0; i < this.c; i++)
{
sm[0][i] = exp_a[i] / sum;
}
return sm;
}
public double cross_etp_err(Mat t)
{
double delta = 0.0000001;
double sum = 0.0;
for (int i = 0; i < this.c; i++)
{
sum = sum + t.matrix_data[0][i] * System.Math.Log(this.matrix_data[0][i] + delta);
}
return -sum;
}
public double[][] numerical_gradient(System.Func<double> loss)
{
double h = 0.0001;
double[][] grad = new double[this.r][];
double tmp_val = 0.0;
double fxh1 = 0.0;
double fxh2 = 0.0;
for(int i = 0; i < this.r; i++)
{
grad[i] = new double[this.c];
for(int j = 0; j < this.c; j++)
{
tmp_val = this.matrix_data[i][j];
this.matrix_data[i][j] = tmp_val + h;
fxh1 = loss();
this.matrix_data[i][j] = tmp_val - h;
fxh2 = loss();
grad[i][j] = (fxh1 - fxh2) / (2 * h);
this.matrix_data[i][j] = tmp_val;
}
}
return grad;
}
//Sous la surcharge de l'opérateur
public static double[][] operator +(Mat p1, Mat p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2.C && p1.R == p2.R)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] + p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator +(double[][] p1, Mat p2)
{
double[][] d = new double[p1.Length][];
if (p1[0].Length == p2.C && p1.Length == p2.R)
{
for (int i = 0; i < p1.Length; i++)
{
d[i] = new double[p1[0].Length];
for (int j = 0; j < p1[0].Length; j++)
{
d[i][j] = p1[i][j] + p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.Length; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator +(Mat p1, double[][] p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2[0].Length && p1.R == p2.Length)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] + p2[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator +(double p1, Mat p2)
{
double[][] d = new double[p2.R][];
for (int i = 0; i < p2.R; i++)
{
d[i] = new double[p2.C];
for (int j = 0; j < p2.C; j++)
{
d[i][j] = p2.Matrix_data[i][j] + p1;
}
}
return d;
}
public static double[][] operator +(Mat p1, double p2)
{
double[][] d = new double[p1.R][];
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] + p2;
}
}
return d;
}
public static double[][] operator -(Mat p1, Mat p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2.C && p1.R == p2.R)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] - p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator -(double[][] p1, Mat p2)
{
double[][] d = new double[p1.Length][];
if (p1[0].Length == p2.C && p1.Length == p2.R)
{
for (int i = 0; i < p1.Length; i++)
{
d[i] = new double[p1[0].Length];
for (int j = 0; j < p1[0].Length; j++)
{
d[i][j] = p1[i][j] - p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.Length; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator -(Mat p1, double[][] p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2[0].Length && p1.R == p2.Length)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] - p2[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator -(double p1, Mat p2)
{
double[][] d = new double[p2.R][];
for (int i = 0; i < p2.R; i++)
{
d[i] = new double[p2.C];
for (int j = 0; j < p2.C; j++)
{
d[i][j] = p1 - p2.Matrix_data[i][j];
}
}
return d;
}
public static double[][] operator -(Mat p1, double p2)
{
double[][] d = new double[p1.R][];
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] - p2;
}
}
return d;
}
public static double[][] operator *(Mat p1, Mat p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2.C && p1.R == p2.R)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] * p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator *(double[][] p1, Mat p2)
{
double[][] d = new double[p1.Length][];
if (p1[0].Length == p2.C && p1.Length == p2.R)
{
for (int i = 0; i < p1.Length; i++)
{
d[i] = new double[p1[0].Length];
for (int j = 0; j < p1[0].Length; j++)
{
d[i][j] = p1[i][j] * p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.Length; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator *(Mat p1, double[][] p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2[0].Length && p1.R == p2.Length)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] * p2[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator *(double p1, Mat p2)
{
double[][] d = new double[p2.R][];
for (int i = 0; i < p2.R; i++)
{
d[i] = new double[p2.C];
for (int j = 0; j < p2.C; j++)
{
d[i][j] = p1 * p2.Matrix_data[i][j];
}
}
return d;
}
public static double[][] operator *(Mat p1, double p2)
{
double[][] d = new double[p1.R][];
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] * p2;
}
}
return d;
}
public static double[][] operator /(Mat p1, Mat p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2.C && p1.R == p2.R)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] / p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator /(double[][] p1, Mat p2)
{
double[][] d = new double[p1.Length][];
if (p1[0].Length == p2.C && p1.Length == p2.R)
{
for (int i = 0; i < p1.Length; i++)
{
d[i] = new double[p1[0].Length];
for (int j = 0; j < p1[0].Length; j++)
{
d[i][j] = p1[i][j] / p2.Matrix_data[i][j];
}
}
}
else
{
for (int k = 0; k < p1.Length; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator /(Mat p1, double[][] p2)
{
double[][] d = new double[p1.R][];
if (p1.C == p2[0].Length && p1.R == p2.Length)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] / p2[i][j];
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
public static double[][] operator /(double p1, Mat p2)
{
double[][] d = new double[p2.R][];
for (int i = 0; i < p2.R; i++)
{
d[i] = new double[p2.C];
for (int j = 0; j < p2.C; j++)
{
d[i][j] = p1 / p2.Matrix_data[i][j];
}
}
return d;
}
public static double[][] operator /(Mat p1, double p2)
{
double[][] d = new double[p1.R][];
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p1.C];
for (int j = 0; j < p1.C; j++)
{
d[i][j] = p1.Matrix_data[i][j] / p2;
}
}
return d;
}
public static double[][] dot(Mat p1, Mat p2)
{
double[][] d = new double[p1.R][];
double temp = 0;
if (p1.C == p2.R)
{
for (int i = 0; i < p1.R; i++)
{
d[i] = new double[p2.C];
for (int j = 0; j < p2.C; j++)
{
for(int a = 0; a < p1.C; a++)
{
temp = temp + p1.Matrix_data[i][a] * p2.Matrix_data[a][j];
}
d[i][j] = temp;
temp = 0.0;
}
}
}
else
{
for (int k = 0; k < p1.R; k++)
{
d[k] = new double[2] { 0, 0 };
}
}
return d;
}
}
}
Puisqu'il est rassemblé dans une classe, il peut également être utilisé comme bibliothèque.
main.cs
namespace Matrix
{
class Program
{
static Mat X = new Mat(
new double[] { 1.0, 0.5 }
),
W1 = new Mat(
new double[] { 0.0, 0.0, 0.0 },
new double[] { 0.0, 0.0, 0.0 }
),
B1 = new Mat(
new double[] { 0.0, 0.0, 0.0 }
),
W2 = new Mat(
new double[] { 0.0, 0.0 },
new double[] { 0.0, 0.0 },
new double[] { 0.0, 0.0 }
),
B2 = new Mat(
new double[] { 0.0, 0.0 }
),
W3 = new Mat(
new double[] { 0.0, 0.0 },
new double[] { 0.0, 0.0 }
),
B3 = new Mat(
new double[] { 0.0, 0.0 }
),
T = new Mat(
new double[] { 0, 1 }
);
static double loss()
{
Mat A1 = new Mat(
new double[] { 0.0, 0.0, 0.0 }
),
Z1 = new Mat(
new double[] { 0.0, 0.0, 0.0 }
),
A2 = new Mat(
new double[] { 0.0, 0.0 }
),
Z2 = new Mat(
new double[] { 0.0, 0.0 }
),
A3 = new Mat(
new double[] { 0.0, 0.0 }
),
Y = new Mat(
new double[] { 0.0, 0.0 }
);
double[][] eeeeee = Mat.dot(X, W1);
A1.Matrix_data = Mat.dot(X, W1) + B1;
Z1.Matrix_data = A1.sigmoid();
A2.Matrix_data = Mat.dot(Z1, W2) + B2;
Z2.Matrix_data = A2.sigmoid();
A3.Matrix_data = Mat.dot(Z2, W3) + B3;
Y.Matrix_data = A3.softmax();
return Y.cross_etp_err(T);
}
static void Main(string[] args)
{
double rate = 0.1;
Mat W1_grad = new Mat(W1.Zero_matrix),
B1_grad = new Mat(B1.Zero_matrix),
W2_grad = new Mat(W2.Zero_matrix),
B2_grad = new Mat(B2.Zero_matrix),
W3_grad = new Mat(W3.Zero_matrix),
B3_grad = new Mat(B3.Zero_matrix);
for (int i = 0; i < 100; i++)
{
W1_grad.Matrix_data = W1.numerical_gradient(loss);
B1_grad.Matrix_data = B1.numerical_gradient(loss);
W2_grad.Matrix_data = W2.numerical_gradient(loss);
B2_grad.Matrix_data = B2.numerical_gradient(loss);
W3_grad.Matrix_data = W3.numerical_gradient(loss);
B3_grad.Matrix_data = B3.numerical_gradient(loss);
W1.Matrix_data = W1 - (rate * W1_grad);
B1.Matrix_data = B1 - (rate * B1_grad);
W2.Matrix_data = W2 - (rate * W2_grad);
B2.Matrix_data = B2 - (rate * B2_grad);
W3.Matrix_data = W3 - (rate * W3_grad);
B3.Matrix_data = B3 - (rate * B3_grad);
System.Console.WriteLine(i.ToString() + ":" + B3.Matrix_data[0][0].ToString());
}
}
}
}
Les arguments dans le constructeur de la classe Mat
prennent le type params double [] ʻou
double [] []`.
Vous pouvez ajouter, soustraire, multiplier et diviser entre "Mat", des nombres réels et "double [] []".
De plus, il est possible de calculer la fonction softmax, la fonction sigmoïde, le calcul du gradient, l'erreur d'entropie de tolérance, etc.
Recommended Posts