Apprentissage profond avec C #!

Reproduction de la bibliothèque numpy de Python en C

Même si je dis que je l'ai reproduit, ce n'est qu'une partie de la fonction que je connais. Je n'ai pas encore commencé à étudier l'apprentissage profond, J'ai lu "Make from scratch Deep Learning" d'O'Reilly et je l'ai fait avec Python. J'ai fait quelque chose en C # qui fonctionne de la même manière. C # n'a pas de bibliothèque comme Numpy (quelqu'un pourrait en avoir créé une si vous la cherchez), vous allez donc l'implémenter vous-même.

J'ai déjà publié un article comme celui-ci.

Premier apprentissage profond en C # -Imitation de l'implémentation en Python-

A cette époque, j'ai permis de réaliser un calcul matriciel simple, mais cette fois, je l'ai un peu amélioré.

Pour le moment, cela peut être difficile à comprendre, mais je vais vous présenter le programme d'apprentissage en profondeur Python que j'ai créé dans cette situation.

Programme réalisé avec Python

`deeplearning.py`


import numpy as np
import matplotlib.pylab as plt

def sigmoid(x):
    return 1 / (1+np.exp(-x))

def ident(x):
    return x

def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val #Restaurer la valeur
        it.iternext()   
        
    return grad

def softmax(a):
    c=np.max(a)
    exp_a=np.exp(a-c)
    sum_exp_a=np.sum(exp_a)
    return exp_a/sum_exp_a

def diff(f,x):
    h=1e-4
    return (f(x+h)-f(x-h)/(2*h))

def cross_etp_err(y,t):
    delta=1e-7
    return -np.sum(t*np.log(y+delta))

class testnet:
    def __init__(self):
#-----------------------------------------------------
        self.X=np.array([1.0,0.5])
        self.W1=np.array([[0.0,0.0,0.0],[0.0,0.0,0.0]])
        self.B1=np.array([0.0,0.0,0.0])
#-----------------------------------------------------
        self.W2=np.array([[0.0,0.0],[0.0,0.0],[0.0,0.0]])
        self.B2=np.array([0.0,0.0])
#-----------------------------------------------------
        self.W3=np.array([[0.0,0.0],[0.0,0.0]])
        self.B3=np.array([0.0,0.0])
#-----------------------------------------------------
        self.T=np.array([0,1])

        self.w1_grad=np.zeros_like(self.W1)
        self.b1_grad=np.zeros_like(self.B1)
        self.w2_grad=np.zeros_like(self.W2)
        self.b2_grad=np.zeros_like(self.B2)
        self.w3_grad=np.zeros_like(self.W3)
        self.b3_grad=np.zeros_like(self.B3)
############################
    def loss(self):
        A1=np.dot(self.X,self.W1)+self.B1
        Z1=sigmoid(A1)
        A2=np.dot(Z1,self.W2)+self.B2
        Z2=sigmoid(A2)
        A3=np.dot(Z2,self.W3)+self.B3
        Y=softmax(A3)
        return cross_etp_err(Y,self.T)
############################
net = testnet()

def f(W):
    return net.loss()

rate=0.1

def deep_learning(net):
    net.w1_grad=numerical_gradient(f,net.W1)
    net.b1_grad=numerical_gradient(f,net.B1) 
    net.w2_grad=numerical_gradient(f,net.W2)
    net.b2_grad=numerical_gradient(f,net.B2)
    net.w3_grad=numerical_gradient(f,net.W3)
    net.b3_grad=numerical_gradient(f,net.B3)

    
    net.W1-=rate*net.w1_grad
    net.B1-=rate*net.b1_grad
    net.W2-=rate*net.w2_grad
    net.B2-=rate*net.b2_grad
    net.W3-=rate*net.w3_grad
    net.B3-=rate*net.b3_grad

loop=0;
while loop<100:
    deep_learning(net)
    print(str(loop)+":"+str(net.B3[0]))
    loop+=1

Imitez Numpy et implémentez-le en C

`deeplearning.cs`


using System.Linq;

namespace Matrix
{
    class Mat
    {
        private int r = 0;
        public int R
        {
            get { return r; }
        }
        private int c = 0;
        public int C
        {
            get { return c; }
        }
        private bool err = false;
        public bool Err
        {
            get { return err; }
        }
        private double[][] matrix_data;
        public double[][] Matrix_data
        {
            get {
                double[][] a = new double[2][];
                a[0] = new double[] { 0, 0 };
                a[1] = new double[] { 0, 0 };
                if (err) return a;
                else return matrix_data;
            }
            set
            {
                matrix_data = value;
            }
        }
        public double[][] Zero_matrix
        {
            get
            {
                double[][] zm = new double[this.r][];
                for (int i = 0; i < this.r; i++)
                {
                    zm[i] = new double[this.c];
                    for (int j = 0; j < this.c; j++)
                    {
                        zm[i][j] = 0;
                    }
                }
                return zm;
            }
        }
        public Mat(params double[][] vs)
        {
            int len = vs[0].Length;

            for (int i = 0; i < vs.Length; i++)
            {
                if (i != 0 && len != vs[i].Length)
                {
                    err = true;
                }
            }
            if (!err)
            {
                r = vs.Length;
                c = vs[0].Length;
                matrix_data = vs;
            }
        }
        public double[][] sigmoid()
        {
            double[][] sig = new double[1][];
            sig[0] = new double[this.c];

            for(int i = 0; i < this.c; i++)
            {
                sig[0][i] = 1 / (1 + System.Math.Exp(this.matrix_data[0][i]));
            }

            return sig;
        }
        public double[][] softmax()
        {
            double[][] sm = new double[1][];
            sm[0] = new double[this.c];

            double m = this.matrix_data[0].Max();

            double[] exp_a = new double[this.c];
            for (int i = 0; i < this.c; i++)
            {
                exp_a[i] = System.Math.Exp(this.matrix_data[0][i] - m);
            }

            double sum = 0.0;
            for (int i = 0; i < this.c; i++)
            {
                sum = sum + exp_a[i];
            }

            for (int i = 0; i < this.c; i++)
            {
                sm[0][i] = exp_a[i] / sum;
            }

            return sm;
        }
        public double cross_etp_err(Mat t)
        {
            double delta = 0.0000001;
            double sum = 0.0;
            for (int i = 0; i < this.c; i++)
            {
                sum = sum + t.matrix_data[0][i] * System.Math.Log(this.matrix_data[0][i] + delta);
            }

            return -sum;
        }
        public double[][] numerical_gradient(System.Func<double> loss)
        {
            double h = 0.0001;
            double[][] grad = new double[this.r][];
            double tmp_val = 0.0;
            double fxh1 = 0.0;
            double fxh2 = 0.0;

            for(int i = 0; i < this.r; i++)
            {
                grad[i] = new double[this.c];
                for(int j = 0; j < this.c; j++)
                {
                    tmp_val = this.matrix_data[i][j];
                    this.matrix_data[i][j] = tmp_val + h;
                    fxh1 = loss();

                    this.matrix_data[i][j] = tmp_val - h;
                    fxh2 = loss();

                    grad[i][j] = (fxh1 - fxh2) / (2 * h);
                    this.matrix_data[i][j] = tmp_val;
                }
            }

            return grad;
        }
        //Sous la surcharge de l'opérateur
        public static double[][] operator +(Mat p1, Mat p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2.C && p1.R == p2.R)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] + p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator +(double[][] p1, Mat p2)
        {
            double[][] d = new double[p1.Length][];

            if (p1[0].Length == p2.C && p1.Length == p2.R)
            {
                for (int i = 0; i < p1.Length; i++)
                {
                    d[i] = new double[p1[0].Length];
                    for (int j = 0; j < p1[0].Length; j++)
                    {
                        d[i][j] = p1[i][j] + p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.Length; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator +(Mat p1, double[][] p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2[0].Length && p1.R == p2.Length)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] + p2[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator +(double p1, Mat p2)
        {
            double[][] d = new double[p2.R][];
            for (int i = 0; i < p2.R; i++)
            {
                d[i] = new double[p2.C];
                for (int j = 0; j < p2.C; j++)
                {
                    d[i][j] = p2.Matrix_data[i][j] + p1;
                }
            }

            return d;
        }
        public static double[][] operator +(Mat p1, double p2)
        {
            double[][] d = new double[p1.R][];
            for (int i = 0; i < p1.R; i++)
            {
                d[i] = new double[p1.C];
                for (int j = 0; j < p1.C; j++)
                {
                    d[i][j] = p1.Matrix_data[i][j] + p2;
                }
            }

            return d;
        }
        public static double[][] operator -(Mat p1, Mat p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2.C && p1.R == p2.R)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] - p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator -(double[][] p1, Mat p2)
        {
            double[][] d = new double[p1.Length][];

            if (p1[0].Length == p2.C && p1.Length == p2.R)
            {
                for (int i = 0; i < p1.Length; i++)
                {
                    d[i] = new double[p1[0].Length];
                    for (int j = 0; j < p1[0].Length; j++)
                    {
                        d[i][j] = p1[i][j] - p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.Length; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator -(Mat p1, double[][] p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2[0].Length && p1.R == p2.Length)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] - p2[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }

        public static double[][] operator -(double p1, Mat p2)
        {
            double[][] d = new double[p2.R][];
            for (int i = 0; i < p2.R; i++)
            {
                d[i] = new double[p2.C];
                for (int j = 0; j < p2.C; j++)
                {
                    d[i][j] = p1 - p2.Matrix_data[i][j];
                }
            }

            return d;
        }
        public static double[][] operator -(Mat p1, double p2)
        {
            double[][] d = new double[p1.R][];
            for (int i = 0; i < p1.R; i++)
            {
                d[i] = new double[p1.C];
                for (int j = 0; j < p1.C; j++)
                {
                    d[i][j] = p1.Matrix_data[i][j] - p2;
                }
            }

            return d;
        }
        public static double[][] operator *(Mat p1, Mat p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2.C && p1.R == p2.R)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] * p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator *(double[][] p1, Mat p2)
        {
            double[][] d = new double[p1.Length][];

            if (p1[0].Length == p2.C && p1.Length == p2.R)
            {
                for (int i = 0; i < p1.Length; i++)
                {
                    d[i] = new double[p1[0].Length];
                    for (int j = 0; j < p1[0].Length; j++)
                    {
                        d[i][j] = p1[i][j] * p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.Length; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator *(Mat p1, double[][] p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2[0].Length && p1.R == p2.Length)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] * p2[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }

        public static double[][] operator *(double p1, Mat p2)
        {
            double[][] d = new double[p2.R][];
            for (int i = 0; i < p2.R; i++)
            {
                d[i] = new double[p2.C];
                for (int j = 0; j < p2.C; j++)
                {
                    d[i][j] = p1 * p2.Matrix_data[i][j];
                }
            }

            return d;
        }
        public static double[][] operator *(Mat p1, double p2)
        {
            double[][] d = new double[p1.R][];
            for (int i = 0; i < p1.R; i++)
            {
                d[i] = new double[p1.C];
                for (int j = 0; j < p1.C; j++)
                {
                    d[i][j] = p1.Matrix_data[i][j] * p2;
                }
            }

            return d;
        }
        public static double[][] operator /(Mat p1, Mat p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2.C && p1.R == p2.R)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] / p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator /(double[][] p1, Mat p2)
        {
            double[][] d = new double[p1.Length][];

            if (p1[0].Length == p2.C && p1.Length == p2.R)
            {
                for (int i = 0; i < p1.Length; i++)
                {
                    d[i] = new double[p1[0].Length];
                    for (int j = 0; j < p1[0].Length; j++)
                    {
                        d[i][j] = p1[i][j] / p2.Matrix_data[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.Length; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
        public static double[][] operator /(Mat p1, double[][] p2)
        {
            double[][] d = new double[p1.R][];

            if (p1.C == p2[0].Length && p1.R == p2.Length)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p1.C];
                    for (int j = 0; j < p1.C; j++)
                    {
                        d[i][j] = p1.Matrix_data[i][j] / p2[i][j];
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }

        public static double[][] operator /(double p1, Mat p2)
        {
            double[][] d = new double[p2.R][];
            for (int i = 0; i < p2.R; i++)
            {
                d[i] = new double[p2.C];
                for (int j = 0; j < p2.C; j++)
                {
                    d[i][j] = p1 / p2.Matrix_data[i][j];
                }
            }

            return d;
        }
        public static double[][] operator /(Mat p1, double p2)
        {
            double[][] d = new double[p1.R][];
            for (int i = 0; i < p1.R; i++)
            {
                d[i] = new double[p1.C];
                for (int j = 0; j < p1.C; j++)
                {
                    d[i][j] = p1.Matrix_data[i][j] / p2;
                }
            }

            return d;
        }
        public static double[][] dot(Mat p1, Mat p2)
        {
            double[][] d = new double[p1.R][];
            double temp = 0;

            if (p1.C == p2.R)
            {
                for (int i = 0; i < p1.R; i++)
                {
                    d[i] = new double[p2.C];
                    for (int j = 0; j < p2.C; j++)
                    {
                        for(int a = 0; a < p1.C; a++)
                        {
                            temp = temp + p1.Matrix_data[i][a] * p2.Matrix_data[a][j];
                        }
                        d[i][j] = temp;
                        temp = 0.0;
                    }
                }
            }
            else
            {
                for (int k = 0; k < p1.R; k++)
                {
                    d[k] = new double[2] { 0, 0 };
                }
            }

            return d;
        }
    }
}

Puisqu'il est rassemblé dans une classe, il peut également être utilisé comme bibliothèque.

Programme d'apprentissage profond C

`main.cs`


namespace Matrix
{
    class Program
    {
        static Mat X = new Mat(
                new double[] { 1.0, 0.5 }
                ),
            W1 = new Mat(
                new double[] { 0.0, 0.0, 0.0 },
                new double[] { 0.0, 0.0, 0.0 }
                ),
            B1 = new Mat(
                new double[] { 0.0, 0.0, 0.0 }
                ),
            W2 = new Mat(
                new double[] { 0.0, 0.0 },
                new double[] { 0.0, 0.0 },
                new double[] { 0.0, 0.0 }
                ),
            B2 = new Mat(
                new double[] { 0.0, 0.0 }
                ),
            W3 = new Mat(
                new double[] { 0.0, 0.0 },
                new double[] { 0.0, 0.0 }
                ),
            B3 = new Mat(
                new double[] { 0.0, 0.0 }
                ),
            T = new Mat(
                new double[] { 0, 1 }
                );

        static double loss()
        {
            Mat A1 = new Mat(
                new double[] { 0.0, 0.0, 0.0 }
                ),
                Z1 = new Mat(
                new double[] { 0.0, 0.0, 0.0 }
                ),
                A2 = new Mat(
                new double[] { 0.0, 0.0 }
                ),
                Z2 = new Mat(
                new double[] { 0.0, 0.0 }
                ),
                A3 = new Mat(
                new double[] { 0.0, 0.0 }
                ),
                Y = new Mat(
                new double[] { 0.0, 0.0 }
                );

            double[][] eeeeee = Mat.dot(X, W1);

            A1.Matrix_data = Mat.dot(X, W1) + B1;
            Z1.Matrix_data = A1.sigmoid();
            A2.Matrix_data = Mat.dot(Z1, W2) + B2;
            Z2.Matrix_data = A2.sigmoid();
            A3.Matrix_data = Mat.dot(Z2, W3) + B3;
            Y.Matrix_data = A3.softmax();

            return Y.cross_etp_err(T);
        }
        static void Main(string[] args)
        {
            double rate = 0.1;

            Mat W1_grad = new Mat(W1.Zero_matrix),
                B1_grad = new Mat(B1.Zero_matrix),
                W2_grad = new Mat(W2.Zero_matrix),
                B2_grad = new Mat(B2.Zero_matrix),
                W3_grad = new Mat(W3.Zero_matrix),
                B3_grad = new Mat(B3.Zero_matrix);
            for (int i = 0; i < 100; i++)
            {
                W1_grad.Matrix_data = W1.numerical_gradient(loss);
                B1_grad.Matrix_data = B1.numerical_gradient(loss);
                W2_grad.Matrix_data = W2.numerical_gradient(loss);
                B2_grad.Matrix_data = B2.numerical_gradient(loss);
                W3_grad.Matrix_data = W3.numerical_gradient(loss);
                B3_grad.Matrix_data = B3.numerical_gradient(loss);

                W1.Matrix_data = W1 - (rate * W1_grad);
                B1.Matrix_data = B1 - (rate * B1_grad);
                W2.Matrix_data = W2 - (rate * W2_grad);
                B2.Matrix_data = B2 - (rate * B2_grad);
                W3.Matrix_data = W3 - (rate * W3_grad);
                B3.Matrix_data = B3 - (rate * B3_grad);

                System.Console.WriteLine(i.ToString() + ":" + B3.Matrix_data[0][0].ToString());
            }
        }
    }
}

Les arguments dans le constructeur de la classe Mat prennent le type params double [] ʻou double [] []`. Vous pouvez ajouter, soustraire, multiplier et diviser entre "Mat", des nombres réels et "double [] []". De plus, il est possible de calculer la fonction softmax, la fonction sigmoïde, le calcul du gradient, l'erreur d'entropie de tolérance, etc.

Imiter Numpy de Python en C #