I have stumbled upon the "likelihood" of elementary statistics. Let's observe how the (simultaneous) probability of the normal distribution changes while moving the command.
As you are familiar with every time, the normal distribution is expressed as follows. For the sake of clarity, the left side is expressed as $ P (x) $.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy.random as rd
import matplotlib.gridspec as gridspec
%matplotlib inline
plt.rcParams['font.size']=15
def plt_legend_out():
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
m = 10
s = 3
min_x = m-4*s
max_x = m+4*s
x = np.linspace(min_x, max_x, 201)
y = (1/np.sqrt(2*np.pi*s**2))*np.exp(-0.5*(x-m)**2/s**2)
plt.xlim(min_x, max_x)
plt.ylim(0,max(y)*1.1)
plt.plot(x,y)
plt.show()
Let's randomly extract 10 pieces of data from the normal distribution.
plt.figure(figsize=(8,1))
rd.seed(7)
data = rd.normal(10, 3, 10, )
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.tick_params(left=False,labelleft=False)
plt.axhline(y=0,color='gray',lw=0.5)
plt.show()
The probability that the above data will occur at the same time is as follows.
\begin{eqnarray}
\prod_{i=1}^NP(x) &=& P(x_1, x_2,\cdots,x_{10})\\
&=& P(x_1)P(x_2)\cdots P(x_{10})
\end{eqnarray}
However, with the above expression, there are problems when calculating. The product of the probabilities will be a fairly small value. In the calculation after the decimal point, zero increases like 0.01 x 0.01 = 0.001. If this continues 10 times or 100 times, there will be many zeros, which is confusing. So, let's take $ log $.
\begin{eqnarray}
\prod_{i=1}^{10}\log{P(x_i)} &=& \log{P(x_1,x_2,\cdots,x_{10})}\\
&=& \log{(P(x_1)×P(x_2)\cdots×P(x_{10}))}\\
&=& \log{P(x_1)}+\log{P(x_2)}+\cdots+\log{P(x_{10})}\\
&=& \color{red}{\sum_{i=1}^{10}\log{P(x_i)}}
\end{eqnarray}
By taking $ log $, I was able to replace it with the problem of addition. $ P (x) $ in this case is a normal distribution. Therefore, the simultaneous probability of the above data is as follows.
\begin{eqnarray}
\color{red}{\sum_{i=1}^{10}\log{P(x_i)}} &=& \sum_{i=1}^N{1 \over \sqrt{2\pi\sigma^{2}}} \exp \left(-{1 \over 2}{(x_i-\mu)^2 \over \sigma^2} \right)
\end{eqnarray}
Now, let's fix $ \ sigma = 3 $ and consider a normal distribution of $ \ mu = 5, 10, 15 $. Let's plot the data extracted earlier as well. Visually, the conditions $ \ mu = 10 $ and $ \ sigma = 3 $ seem to fit well.
def norm_dens(x,m,s):
return (1/np.sqrt(2*np.pi*s**2))*np.exp(-0.5*(x-m)**2/s**2)
def log_likelihood(x,m,s):
L = np.prod([norm_dens(x_i,m,s) for x_i in x])
l = np.log(L)
return l
logp_ymin = -10 ;logp_ymax = 0
d_ymin = -0.01 ; d_ymax = 0.2
plt.figure(figsize=(10,2))
plt.subplots_adjust(hspace=0.1, wspace=0.1)
x = np.linspace(0, 20, 100)
###########
plt.subplot(131)
m=5 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.ylabel('density')
plt.axhline(y=0,color='gray',lw=0.5)
plt.ylim(d_ymin,d_ymax)
plt.xlim(0,20)
plt.tick_params(bottom=False,labelbottom=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
###########
plt.subplot(132)
m=10 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.axhline(y=0,color='gray',lw=0.5)
plt.xlim(0,20)
plt.ylim(d_ymin,d_ymax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
###########
plt.subplot(133)
m=15 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.axhline(y=0,color='gray',lw=0.5)
plt.xlim(0,20)
plt.ylim(d_ymin,d_ymax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
plt.show()
Now, let's plot $ \ log {P (x)} $ as well. Under the conditions of $ \ mu = 10 $ and $ \ sigma = 3 $, $ \ sum {\ log {P (x)}} $ was the largest.
logp_ymin = -10 ;logp_ymax = 0
d_ymin = -0.01 ; d_ymax = 0.2
plt.figure(figsize=(10,2))
plt.subplots_adjust(hspace=0.1, wspace=0.1)
x = np.linspace(0, 20, 100)
###########
plt.subplot(131)
m=5 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.ylabel('density')
plt.axhline(y=0,color='gray',lw=0.5)
plt.ylim(d_ymin,d_ymax)
plt.xlim(0,20)
plt.tick_params(bottom=False,labelbottom=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
###########
plt.subplot(132)
m=10 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.axhline(y=0,color='gray',lw=0.5)
plt.xlim(0,20)
plt.ylim(d_ymin,d_ymax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
###########
plt.subplot(133)
m=15 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.axhline(y=0,color='gray',lw=0.5)
plt.xlim(0,20)
plt.ylim(d_ymin,d_ymax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
plt.show()
Next, let's fix $ \ mu = 10 $ and see the normal distribution of $ \ sigma = 2,3,5 $. Although the big difference is not seen, $ \ sum {\ log {P (x)}} $ is the largest under the conditions of $ \ mu = 10 $ and $ \ sigma = 3 $. The distribution of $ \ sigma = 5 $ seems too wide in light of the data. In other words, it can be seen that the data and distribution fit well under the condition that $ \ sum {\ log {P (x)}} $ is large (simultaneous probability is large).
logp_ymin = -6 ;logp_ymax = 0
d_ymin = -0.01 ; d_ymax = 0.25
plt.figure(figsize=(10,4))
plt.subplots_adjust(hspace=0.1, wspace=0.1)
x = np.linspace(0, 20, 100)
###########
plt.subplot(231)
m=10 ; s=2
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.ylabel('density')
plt.axhline(y=0,color='gray',lw=0.5)
plt.ylim(d_ymin,d_ymax)
plt.xlim(0,20)
plt.tick_params(bottom=False,labelbottom=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
plt.subplot(234)
xl = [np.log(norm_dens(x,m,s)) for x in data]
plt.scatter(data,xl,color='red')
plt.xlim(0,20)
plt.ylabel('log p(x)')
plt.ylim(logp_ymin,logp_ymax)
plt.xlabel('x')
for i in range(len(data)):plt.plot([data[i],data[i]],[0,xl[i]],color='gray',lw=0.5,ls='dashed')
plt.text(3,-10,'$\sum{\log{p(x)}}$='+str(np.round(np.sum(xl),1)))
###########
plt.subplot(232)
m=10 ; s=3
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.axhline(y=0,color='gray',lw=0.5)
plt.xlim(0,20)
plt.ylim(d_ymin,d_ymax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
plt.subplot(235)
xl = [np.log(norm_dens(x,m,s)) for x in data]
plt.scatter(data,xl,color='red')
plt.xlim(0,20)
plt.ylim(logp_ymin,logp_ymax)
plt.tick_params(left=False,labelleft=False)
plt.xlabel('x')
for i in range(len(data)):plt.plot([data[i],data[i]],[0,xl[i]],color='gray',lw=0.5,ls='dashed')
plt.text(3,-10,'$\sum{\log{p(x)}}$='+str(np.round(np.sum(xl),1)))
###########
plt.subplot(233)
m=10 ; s=5
y = norm_dens(x,m,s)
plt.plot(x,y,label='$\mu=$'+str(m)+'$, \sigma=$'+str(s))
plt.scatter(data, np.zeros_like(data), c="r", s=50)
plt.axhline(y=0,color='gray',lw=0.5)
plt.xlim(0,20)
plt.ylim(d_ymin,d_ymax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.title('$\mu=$'+str(m)+', $\sigma=$'+str(s))
plt.subplot(236)
xl = [np.log(norm_dens(x,m,s)) for x in data]
plt.scatter(data,xl,color='red')
plt.xlim(0,20)
plt.ylim(logp_ymin,logp_ymax)
for i in range(len(data)):plt.plot([data[i],data[i]],[0,xl[i]],color='gray',lw=0.5,ls='dashed')
plt.tick_params(left=False,labelleft=False)
plt.xlabel('x')
plt.text(3,-10,'$\sum{\log{p(x)}}$='+str(np.round(np.sum(xl),1)))
plt.show()
Finally, let's explore $ \ mu $ and $ \ sigma $ in detail. Searching is just a comprehensive search. As a result, we were able to estimate a value close to the true value.
mus = np.linspace(8, 12, 50)
ss = np.linspace(2, 4, 50)
lmu = [] ; ls = [] ; lll = []
for mu in mus:
for s in ss:
lmu.append(mu)
ls.append(s)
lll.append(log_likelihood(data,mu,s))
plt.scatter(lmu,ls,c=lll,alpha=0.8)
plt.xlabel('$\mu$')
plt.ylabel('$\sigma$')
plt.colorbar()
plt.scatter(10,3,color='r')
plt.text(10.1,3.1,'true',color='r')
pmu,ps,pll = pd.DataFrame([lmu,ls,lll]).T.sort_values(2,ascending=False).reset_index(drop=True).loc[0,:].to_numpy()
plt.scatter(pmu,ps,color='b')
plt.text(pmu+0.1,ps+0.1,'predicted',color='b')
plt.title('Simultaneous probability')
plt.show()
This time I knew that "the data came from the normal distribution". But in reality, we will assume the probability distribution behind the data. In that case, the parameter of the distribution is estimated by replacing the expression "likelihood" from "simultaneous probability". What you are doing is the same.
Reference URL
Recommended Posts