Hello.
For the calculation of the support vector machine (SVM), refer to the procedure (using cvxopt) in A breakthrough on artificial intelligence "Soft Margin SVM". If you follow it exactly, you will feel like you have calculated the solution yourself. In [^ 1] below, the convergent solution of Lagrange multiplier alpha, tabplot, etc. are also plotted ( `class × prediction == 1
). Highlight the data that becomes `. Prediction == 0 is the border).
$ ./svm.py
file = classification.txt (len=200)
cvxopt result status: optimal
delta = 5.684342e-14
class = ('-1', '+1')
confusion matrix:
[[96 7]
[34 63]]
svm.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# support vector machine (SVM)Calculation
# cvxopt.solvers.qp (Quadratic Programming)use
from __future__ import print_function
import numpy as np
import cvxopt
Ccoeff = [30, 30] # soft margin coefficients of class
SIGMA = 1.054 # for non-linear kernel
kname = 'gaussian'
#Gaussian RBF kernel
def gaussian_kernel(x,y):
gamma = 1 / (2 * (SIGMA ** 2))
return np.exp(-norm2(x-y) * gamma)
def kernel(x,y):
return globals()[kname + '_kernel'](x,y)
def norm2(x):
return np.dot(x, x)
#Find the Lagrange multiplier alpha with quadratic programming
def QP(dat, clas, Ccoeff):
coeff = np.array([Ccoeff[x>0] for x in clas])
N, Z = len(dat), zip(clas, dat)
Q = cvxopt.matrix(np.array([[c*c_*kernel(d,d_) for c, d in Z] for c_, d_ in Z]))
p = cvxopt.matrix(-np.ones(N))
G = cvxopt.matrix(np.vstack((np.diag([-1.0]*N), np.identity(N))))
h = cvxopt.matrix(np.hstack((np.zeros(N), coeff)))
A = cvxopt.matrix(clas, (1,N))
b = cvxopt.matrix(0.0)
cvxopt.solvers.options['abstol'] = 1e-5
cvxopt.solvers.options['reltol'] = 1e-10
cvxopt.solvers.options['show_progress'] = False
sol = cvxopt.solvers.qp(Q, p, G, h, A, b)
alpha = np.array(sol['x']).reshape(N)
print('cvxopt result status: %s' % sol['status'])
print('delta = %e' % np.dot(alpha, clas))
return alpha
#Find the index of support vectors
def SV(dat, clas, Ccoeff, alpha):
supportVectors = []
edgeVectors = []
c = [Ccoeff[x>0] for x in clas]
infinitesimal = 1e-3 * min(Ccoeff)
for i in range(len(alpha)):
if alpha[i] > infinitesimal:
supportVectors.append(i)
if alpha[i] < c[i] - infinitesimal:
edgeVectors.append(i)
bias = average([clas[i] - prediction(dat[i], alpha, clas, dat, supportVectors) for i in edgeVectors])
return supportVectors, edgeVectors, bias
def average(x):
return sum(x)/len(x)
#Predicted value
def prediction(x, alpha, clas, dat, supportVectors, bias=0):
p = [alpha[i] * clas[i] * kernel(x, dat[i]) for i in supportVectors]
return sum(p) + bias
#Discrimination table confusion matrix
def confusion_matrix(clas, predict):
mat = np.zeros([2, 2], dtype=int)
for c, p in zip(clas, predict):
mat[c>0, p>0] += 1
return mat
#Supervised data classification.txt (N = 200)
# http://research.microsoft.com/en-us/um/people/cmbishop/prml/webdatasets/classification.txt
def make_data():
filename = 'classification.txt'
dat_clas = np.genfromtxt(filename)
dat, clas = dat_clas[:,0:2], dat_clas[:,2]
clas = clas * 2 - 1.0 #Teacher signal-Convert to 1 or 1
classname = ('-1', '+1')
print('file = %s (len=%d)' % (filename, len(dat)))
return dat, clas, classname
def main():
dat, clas, classname = make_data()
#Find the solution of SVM by quadratic programming
alpha = QP(dat, clas, Ccoeff)
supportVectors, edgeVectors, bias = SV(dat, clas, Ccoeff, alpha)
#Predicted value, discrimination table
predict = [prediction(x, alpha, clas, dat, supportVectors, bias) for x in dat]
print('class =', classname, '\n', confusion_matrix(clas, predict))
if __name__ == '__main__':
main()
[^ 1]: This is the same calculation as in Chapter 7 of PRML, but I feel that the plot result diagram in this book is a little loose in the convergence of the solution or the contour calculation.
Recommended Posts