The function proposed in Understanding Black-box Predictions via Influence Functions, which is the best paper of ICML2017, gives the perturbations given by the learning sample to the machine learning model. It is a formulation.
Please refer to here and here for the outline and explanation of the paper.
In this article, we will do some experiments to understand the behavior of the influence function.
For logistic regression, refer to Implementation here.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
#iris dataset
iris = load_iris()
#Use only the length and width of the setosa and versicolor calyx
X = iris.data[iris.target != 2][:, :2]
Y = iris.target[iris.target != 2]
#Standardization
X = StandardScaler().fit_transform(X)
#Scatter plot
plt.scatter(X[:,0][Y == 0], X[:,1][Y == 0], label=iris.target_names[0], marker='o')
plt.scatter(X[:,0][Y == 1], X[:,1][Y == 1], label=iris.target_names[1], marker='x')
plt.legend()
plt.show()
def sigmoid(x):
"""Sigmoid function"""
return 1 / (1 + np.exp(-x))
def logistic_regression(X,Y):
"""Logistic regression"""
ETA = 1e-3 #Learning rate
epochs = 5000 #Number of updates
#Added column 0 for calculation of bias term
X = np.hstack([np.ones([X.shape[0],1]),X])
#Parameter initialization
theta = np.random.rand(3)
print('Parameter before update θ')
print(theta)
#Parameter update
for _ in range(epochs):
theta = theta + ETA * np.dot(Y - sigmoid(np.dot(X, theta)), X)
print('Updated parameter θ')
print(theta)
print('Decision boundary')
print('y = {:0.3f} + {:0.3f} * x1 + {:0.3f} * x2'.format(theta[0], theta[1], theta[2]))
return theta
def decision_boundary(xline, theta):
"""Decision boundary"""
return -(theta[0] + theta[1] * xline) / theta[2]
theta = logistic_regression(X,Y)
#Plot of sample data
plt.plot(X[Y==0, 0],X[Y==0,1],'o', label=iris.target_names[0])
plt.plot(X[Y==1, 0],X[Y==1,1],'x', label=iris.target_names[1])
xline = np.linspace(np.min(X[:,0]),np.max(X[:,0]),100)
#Plot of decision boundaries
plt.plot(xline, decision_boundary(xline, theta), label='decision boundary')
plt.legend()
plt.show()
#influence function is y in{1,-1}So convert Y to that way
Y1 = np.copy(Y)
Y1[Y1==0] = -1
#Added column 0 for calculation of bias term
X1 = np.hstack([np.ones([X.shape[0],1]),X])
def get_influence(x,y,theta):
"""influence function"""
H = (1/X1.shape[0]) * np.sum(np.array([sigmoid(np.dot(xi, theta)) * sigmoid(-np.dot(xi, theta)) * np.dot(xi.reshape(-1,1), xi.reshape(1,-1)) for xi in X1]), axis=0)
return - y * sigmoid(- y * np.dot(theta, x)) * np.dot(-Y1 * sigmoid(np.dot(Y1, np.dot(theta, X1.T))), np.dot(x, np.dot(np.linalg.inv(H), X1.T)))
#List of influence values for each sample
influence_list = [get_influence(x,y,theta) for x,y in zip(X1,Y1)]
#Plot including influence value
plt.figure(figsize=(12,8))
plt.plot(X[Y==0, 0],X[Y==0,1],'o')
plt.plot(X[Y==1, 0],X[Y==1,1],'x')
plt.plot(xline, decision_boundary(xline, theta), label='decision boundary')
#Insert influence value into graph
for x,influence in zip(X, influence_list):
plt.annotate(f"{influence:.1f}", xy=(x[0], x[1]), size=10)
plt.legend()
plt.show()
The figure below shows the influence values of the individual samples plotted together.
It can be seen that the closer the sample is to the decision boundary, the higher the influence value.
Mislabels are incorrect labels that can cause the model to learn incorrectly or prevent proper evaluation.
Here, we will investigate what the influence value will be by mixing intentional mislabeling.
#Intentionally replace some labels
Y[76] = 0
Y[22] = 1
#Scatter plot
plt.scatter(X[:,0][Y == 0], X[:,1][Y == 0], label=iris.target_names[0], marker='o')
plt.scatter(X[:,0][Y == 1], X[:,1][Y == 1], label=iris.target_names[1], marker='x')
plt.legend()
plt.show()
theta = logistic_regression(X,Y)
#influence function is y in{1,-1}So convert Y to that way
Y1 = np.copy(Y)
Y1[Y1==0] = -1
#Added column 0 for calculation of bias term
X1 = np.hstack([np.ones([X.shape[0],1]),X])
def get_influence(x,y,theta):
"""influence function"""
H = (1/X1.shape[0]) * np.sum(np.array([sigmoid(np.dot(xi, theta)) * sigmoid(-np.dot(xi, theta)) * np.dot(xi.reshape(-1,1), xi.reshape(1,-1)) for xi in X1]), axis=0)
return - y * sigmoid(- y * np.dot(theta, x)) * np.dot(-Y1 * sigmoid(np.dot(Y1, np.dot(theta, X1.T))), np.dot(x, np.dot(np.linalg.inv(H), X1.T)))
#List of influence values for each sample
influence_list = [get_influence(x,y,theta) for x,y in zip(X1,Y1)]
#Plot including influence value
plt.figure(figsize=(12,8))
plt.plot(X[Y==0, 0],X[Y==0,1],'o')
plt.plot(X[Y==1, 0],X[Y==1,1],'x')
plt.plot(xline, decision_boundary(xline, theta), label='decision boundary')
#Insert influence value into graph
for x,influence in zip(X, influence_list):
plt.annotate(f"{influence:.1f}", xy=(x[0], x[1]), size=10)
plt.legend()
plt.show()
You can see that the influence value of the mislabeled sample is very high.
In addition, the sample in the lower left extends beyond the decision boundary, and there is concern that mislabeling has an adverse effect on learning.
--By using the influence function, you can find samples that are close to the decision boundary (not confident) and mislabels.
Recommended Posts