Many people will read First Pattern Recognition as an introduction to machine learning. I was one of them, and I thought that I read it once and understood it completely, but when I read it again, I still do not understand anything [^ 1], so I decided to read it while implementing it. This article summarizes the code that reproduces the execution example described in the book in Python (the operating environment of the described code is numpy 1.16.2, pandas 0.24.2). This time is Chapter 4, "Probability Model and Discrimination Function".
Standardization is the process of subtracting the mean and dividing by the standard deviation. Below is the code.
--Loading the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
--Reading iris data
from sklearn.datasets import load_iris
iris = load_iris()
data = pd.DataFrame(iris['data'], columns=iris.feature_names)
target = pd.Series(iris['target']).map({0: 'setosa', 1:'versicolor', 2:'virginica'})
--Mean and covariance matrix of iris petal length and width
print('mu = ', np.mean(data[['petal length (cm)', 'petal width (cm)']]))
print('Sigma = ', np.cov(data[['petal length (cm)', 'petal width (cm)']], rowvar=0))
#output=>
# mu =
# petal length (cm) 3.758000
# petal width (cm) 1.199333
# dtype: float64
# Sigma =
# [[3.11627785 1.2956094 ]
# [1.2956094 0.58100626]]
--Scatter plot before standardization --Fig. 4.2 (a)
for t in target.unique():
data_tmp = data[target==t]
plt.scatter(data_tmp['petal length (cm)'], data_tmp['petal width (cm)'], label=t)
plt.legend()
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm')
plt.xlim([1, 7])
plt.ylim([-1, 4])
plt.show()
--Scatter plot after standardization --Fig. 4.2 (b)
def standardize(X):
return (X - np.mean(X)) / np.std(X)
std_data = standardize(data[['petal length (cm)', 'petal width (cm)']])
for t in target.unique():
data_tmp = std_data[target==t]
plt.scatter(data_tmp['petal length (cm)'], data_tmp['petal width (cm)'], label=t)
plt.legend()
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.xlim([-2, 2])
plt.ylim([-2, 2])
plt.show()
Rotating over a matrix of eigenvectors, that is uncorrelated. Below is the code.
--Scatter plot after uncorrelation --Fig. 4.4 (b)
def no_correlation(X):
cov_mat = np.cov(X, rowvar=0)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
return np.dot(X, eig_vecs)
no_corr_data = no_correlation(data[['petal length (cm)', 'petal width (cm)']])
for t in target.unique():
data_tmp = no_corr_data[target==t]
plt.scatter(data_tmp[:, 0], data_tmp[:, 1], label=t)
plt.legend()
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.xlim([0, 8])
plt.ylim([-3, 3])
plt.show()
In this execution example, the sign of the y component of the eigenvector is reversed from that described in the book, so the scatter plot also has the y-axis direction reversed.
Sigma = np.cov(data[['petal length (cm)', 'petal width (cm)']], rowvar=0)
Lambda, S = np.linalg.eig(Sigma)
print('Lambda = \n', Lambda)
print('S = \n', S)
#output=>
# Lambda =
# [3.66123805 0.03604607]
# S =
# [[ 0.92177769 -0.38771882]
# [ 0.38771882 0.92177769]]
--Covariance matrix after uncorrelated
print('Sigma = \n', np.cov(no_corr_data, rowvar=0))
#output=>
# Sigma =
# [[3.66123805e+00 6.01365790e-16]
# [6.01365790e-16 3.60460707e-02]]
Whitening is centralized, uncorrelated, and normalizes the standard deviation to 1. Below is the code.
--Scatter plot after whitening --Fig. 4.6 (b)
def whitening(X):
cov_mat = np.cov(X, rowvar=0)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
diag_sqrt_eig_vals = np.diag(np.sqrt(eig_vals))
return np.dot(np.dot(X-np.mean(X, axis=0), eig_vecs), np.linalg.inv(diag_sqrt_eig_vals.T))
whitened_data = whitening(data[['petal length (cm)', 'petal width (cm)']])
for t in target.unique():
data_tmp = whitened_data[target==t]
plt.scatter(data_tmp[:, 0], data_tmp[:, 1], label=t)
plt.legend()
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.xlim([-3, 3])
plt.ylim([-3, 4])
plt.show()
--Covariance matrix after whitening
print('Sigma = \n', np.cov(whitened_data, rowvar=0))
#output=>
# Sigma =
# [[1.00000000e+00 1.64411249e-15]
# [1.64411249e-15 1.00000000e+00]]
A function that identifies a class, assuming that the class conditional probabilities follow a normal distribution. Below is the code.
--Reading Pima Indian data
The data is borrowed from R datasets.
pima_train = pd.read_csv('https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/MASS/Pima.tr.csv')
pima_test = pd.read_csv('https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/MASS/Pima.te.csv')
--Scatter plot of training data --Figure 4.8
for t in ['Yes', 'No']:
pima_tmp = pima_train[pima_train['type']==t]
plt.scatter(pima_tmp['glu'], pima_tmp['bmi'], label=t)
plt.legend()
plt.xlabel('glu')
plt.ylabel('bmi')
plt.xlim([50, 200])
plt.ylim([15, 50])
plt.show()
--Implementation of secondary identification function
class QuadraticDiscriminantFunction:
def __init__(self, threshold=0):
self._S = None
self._c = None
self._F = None
self._threshold = threshold
def fit(self, X, Y):
X_pos = X[Y==1]
X_neg = X[Y==0]
mu_pos = np.mean(X_pos, axis=0)
mu_neg = np.mean(X_neg, axis=0)
Sigma_pos = np.cov(X_pos, rowvar=0)
Sigma_neg = np.cov(X_neg, rowvar=0)
p_pos = len(Y[Y==1])/len(Y)
p_neg = len(Y[Y==0])/len(Y)
self._S = np.linalg.inv(Sigma_pos) - np.linalg.inv(Sigma_neg)
self._c = np.dot(mu_neg, np.linalg.inv(Sigma_neg)) - np.dot(mu_pos, np.linalg.inv(Sigma_pos))
self._F = np.dot(np.dot(mu_pos, np.linalg.inv(Sigma_pos)), mu_pos) - np.dot(np.dot(mu_neg, np.linalg.inv(Sigma_neg)), mu_neg)\
+ np.log(np.linalg.det(Sigma_pos)/np.linalg.det(Sigma_neg))\
- 2*np.log(p_pos/p_neg)
def predict(self, X):
xSx = np.diag(np.dot(np.dot(X, self._S), X.T))
cx = np.dot(self._c, X.T)
Y_hat = xSx + 2*cx + self._F
return np.where(Y_hat < self._threshold, 1, 0)
--Implementation of linear discriminant function
class LinearDiscriminantFunction:
def __init__(self, threshold=0):
self._c = None
self._F = None
self._threshold = threshold
def fit(self, X, Y):
X_pos = X[Y==1]
X_neg = X[Y==0]
mu_pos = np.mean(X_pos, axis=0)
mu_neg = np.mean(X_neg, axis=0)
Sigma_pos = np.cov(X_pos, rowvar=0)
Sigma_neg = np.cov(X_neg, rowvar=0)
p_pos = len(Y[Y==1])/len(Y)
p_neg = len(Y[Y==0])/len(Y)
Sigma_pool = p_pos*Sigma_pos + p_neg*Sigma_neg
self._c = np.dot(mu_neg, np.linalg.inv(Sigma_pool)) - np.dot(mu_pos, np.linalg.inv(Sigma_pool))
self._F = np.dot(np.dot(mu_pos, np.linalg.inv(Sigma_pool)), mu_pos) - np.dot(np.dot(mu_neg, np.linalg.inv(Sigma_pool)), mu_neg)\
- 2*np.log(p_pos/p_neg)
def predict(self, X):
cx = np.dot(self._c, X.T)
Y_hat = 2*cx + self._F
return np.where(Y_hat < self._threshold, 1, 0)
--Learning and prediction for drawing Bayesian identification boundaries
X_train = pima_train[['glu', 'bmi']].values
Y_train = pima_train['type'].map({'Yes':1, 'No':0})
qdf = QuadraticDiscriminantFunction(threshold=0)
qdf.fit(X_train, Y_train)
ldf = LinearDiscriminantFunction(threshold=0)
ldf.fit(X_train, Y_train)
X1_min, X1_max = np.min(X_train[:, 0])-1, np.max(X_train[:, 0])+1
X2_min, X2_max = np.min(X_train[:, 1])-1, np.max(X_train[:, 1])+1
X1, X2 = np.meshgrid(np.arange(X1_min, X1_max, 1), np.arange(X2_min, X2_max, 0.5))
X = np.c_[X1.ravel(), X2.ravel()]
Y_qdf_pred = qdf.predict(X)
Y_ldf_pred = ldf.predict(X)
--Bayesian discrimination boundary obtained from the secondary discriminant function --Fig. 4.9 (a)
plt.contour(X1, X2, Y_qdf_pred.reshape(X1.shape), colors='limegreen')
for t in ['Yes', 'No']:
pima_tmp = pima_train[pima_train['type']==t]
plt.scatter(pima_tmp['glu'], pima_tmp['bmi'], label=t)
plt.legend()
plt.xlabel('glu')
plt.ylabel('bmi')
plt.xlim([50, 200])
plt.ylim([15, 50])
plt.show()
--Error rate of secondary discriminant function
X_test = pima_test[['glu', 'bmi']].values
Y_test = pima_test['type'].map({'Yes':1, 'No':0})
Y_train_qdf_pred = qdf.predict(X_train)
print('error rate (train) = {:.1f}%'.format(np.mean(Y_train_qdf_pred != Y_train)*100))
Y_test_qdf_pred = qdf.predict(X_test)
print('error rate (test) = {:.1f}%'.format(np.mean(Y_test_qdf_pred != Y_test)*100))
#output=>
# error rate (train) = 24.5%
# error rate (test) = 23.8%
--Bayesian discrimination boundary obtained from linear discriminant function --Fig. 4.9 (b)
plt.contour(X1, X2, Y_ldf_pred.reshape(X1.shape), colors='limegreen')
for t in ['Yes', 'No']:
pima_tmp = pima_train[pima_train['type']==t]
plt.scatter(pima_tmp['glu'], pima_tmp['bmi'], label=t)
plt.legend()
plt.xlabel('glu')
plt.ylabel('bmi')
plt.xlim([50, 200])
plt.ylim([15, 50])
plt.show()
--Error rate of linear discriminant function
Y_train_ldf_pred = ldf.predict(X_train)
print('error rate (train) = {:.1f}%'.format(np.mean(Y_train_ldf_pred != Y_train)*100))
Y_test_ldf_pred = ldf.predict(X_test)
print('error rate (test) = {:.1f}%'.format(np.mean(Y_test_ldf_pred != Y_test)*100))
#output=>
# error rate (train) = 24.0%
# error rate (test) = 22.0%
--ROC curve of quadratic discriminant function / linear discriminant function --Fig. 4.10
qdf_tpr_list = []
qdf_fpr_list = []
ldf_tpr_list = []
ldf_fpr_list = []
for th in np.arange(-9, 7, 0.1):
qdf = QuadraticDiscriminantFunction(threshold=th)
qdf.fit(X_train, Y_train)
Y_qdf_pred = qdf.predict(X_test)
tpr_qdf = len(Y_test[(Y_test==0)&(Y_qdf_pred==0)]) / len(Y_test[Y_test==0])
qdf_tpr_list.append(tpr_qdf)
fpr_qdf = len(Y_test[(Y_test==1)&(Y_qdf_pred==0)]) / len(Y_test[Y_test==1])
qdf_fpr_list.append(fpr_qdf)
ldf = LinearDiscriminantFunction(threshold=th)
ldf.fit(X_train, Y_train)
Y_ldf_pred = ldf.predict(X_test)
tpr_ldf = len(Y_test[(Y_test==0)&(Y_ldf_pred==0)]) / len(Y_test[Y_test==0])
ldf_tpr_list.append(tpr_ldf)
fpr_ldf = len(Y_test[(Y_test==1)&(Y_ldf_pred==0)]) / len(Y_test[Y_test==1])
ldf_fpr_list.append(fpr_ldf)
plt.plot(qdf_fpr_list, qdf_tpr_list, label='Quadratic')
plt.plot(ldf_fpr_list, ldf_tpr_list, label='Linear')
plt.legend()
plt.xlabel('false positive ratio')
plt.ylabel('true positive ratio')
plt.show()
I tried to reproduce the execution example of Chapter 4 of Hajipata in Python. As for the correctness of the code, the drawn figure roughly matches the figure in the book, so Yoshi! We only check the degree, so if you find any mistakes, we would appreciate it if you could make an edit request.
Recommended Posts