What is influence function

The function proposed in Understanding Black-box Predictions via Influence Functions, which is the best paper of ICML2017, gives the perturbations given by the learning sample to the machine learning model. It is a formulation.

Please refer to here and here for the outline and explanation of the paper.

In this article, we will do some experiments to understand the behavior of the influence function.

Logistic regression and influence function

For logistic regression, refer to Implementation here.

Use iris dataset

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

#iris dataset
iris = load_iris()

#Use only the length and width of the setosa and versicolor calyx
X = iris.data[iris.target != 2][:, :2]
Y = iris.target[iris.target != 2]

#Standardization
X = StandardScaler().fit_transform(X)

#Scatter plot
plt.scatter(X[:,0][Y == 0], X[:,1][Y == 0], label=iris.target_names[0], marker='o') 
plt.scatter(X[:,0][Y == 1], X[:,1][Y == 1], label=iris.target_names[1], marker='x')
plt.legend()
plt.show()

First, we usually implement logistic regression, apply it to the iris dataset, and illustrate the decision boundaries.

def sigmoid(x):
    """Sigmoid function"""
    return 1 / (1 + np.exp(-x))

def logistic_regression(X,Y):
    """Logistic regression"""
    ETA = 1e-3  #Learning rate
    epochs = 5000  #Number of updates
    
    #Added column 0 for calculation of bias term
    X = np.hstack([np.ones([X.shape[0],1]),X])

    #Parameter initialization
    theta = np.random.rand(3)

    print('Parameter before update θ')
    print(theta)

    #Parameter update
    for _ in range(epochs):
        theta = theta + ETA * np.dot(Y - sigmoid(np.dot(X, theta)), X)

    print('Updated parameter θ')
    print(theta)

    print('Decision boundary')
    print('y = {:0.3f} + {:0.3f} * x1 + {:0.3f} * x2'.format(theta[0], theta[1], theta[2]))

    return theta

def decision_boundary(xline, theta):
    """Decision boundary"""
    return -(theta[0] + theta[1] * xline) / theta[2]

theta = logistic_regression(X,Y)

#Plot of sample data
plt.plot(X[Y==0, 0],X[Y==0,1],'o', label=iris.target_names[0])
plt.plot(X[Y==1, 0],X[Y==1,1],'x', label=iris.target_names[1])

xline = np.linspace(np.min(X[:,0]),np.max(X[:,0]),100)

#Plot of decision boundaries
plt.plot(xline, decision_boundary(xline, theta), label='decision boundary')
plt.legend()
plt.show()

Next, implement the influence function. (I'm sorry if the implementation is wrong!)

#influence function is y in{1,-1}So convert Y to that way
Y1 = np.copy(Y)
Y1[Y1==0] = -1

#Added column 0 for calculation of bias term
X1 = np.hstack([np.ones([X.shape[0],1]),X])

def get_influence(x,y,theta):
    """influence function"""
    H = (1/X1.shape[0]) * np.sum(np.array([sigmoid(np.dot(xi, theta)) * sigmoid(-np.dot(xi, theta)) * np.dot(xi.reshape(-1,1), xi.reshape(1,-1)) for xi in X1]), axis=0)
    return - y * sigmoid(- y * np.dot(theta, x)) * np.dot(-Y1 * sigmoid(np.dot(Y1, np.dot(theta, X1.T))), np.dot(x, np.dot(np.linalg.inv(H), X1.T)))

#List of influence values for each sample
influence_list = [get_influence(x,y,theta) for x,y in zip(X1,Y1)]

#Plot including influence value
plt.figure(figsize=(12,8))
plt.plot(X[Y==0, 0],X[Y==0,1],'o')
plt.plot(X[Y==1, 0],X[Y==1,1],'x')
plt.plot(xline, decision_boundary(xline, theta), label='decision boundary')
#Insert influence value into graph
for x,influence in zip(X, influence_list):
    plt.annotate(f"{influence:.1f}", xy=(x[0], x[1]), size=10)
plt.legend()
plt.show()

The figure below shows the influence values of the individual samples plotted together.

It can be seen that the closer the sample is to the decision boundary, the higher the influence value.

When mislabel is intentionally mixed

Mislabels are incorrect labels that can cause the model to learn incorrectly or prevent proper evaluation.

Here, we will investigate what the influence value will be by mixing intentional mislabeling.

Some labels are intentionally replaced. If you look at the figure below, you can see that there is a clearly wrong label.

#Intentionally replace some labels
Y[76] = 0
Y[22] = 1

#Scatter plot
plt.scatter(X[:,0][Y == 0], X[:,1][Y == 0], label=iris.target_names[0], marker='o') 
plt.scatter(X[:,0][Y == 1], X[:,1][Y == 1], label=iris.target_names[1], marker='x')
plt.legend()
plt.show()

For this data, apply logistic regression in the same way as above, calculate the influence value, and illustrate it as follows.

theta = logistic_regression(X,Y)

#influence function is y in{1,-1}So convert Y to that way
Y1 = np.copy(Y)
Y1[Y1==0] = -1

#Added column 0 for calculation of bias term
X1 = np.hstack([np.ones([X.shape[0],1]),X])

def get_influence(x,y,theta):
    """influence function"""
    H = (1/X1.shape[0]) * np.sum(np.array([sigmoid(np.dot(xi, theta)) * sigmoid(-np.dot(xi, theta)) * np.dot(xi.reshape(-1,1), xi.reshape(1,-1)) for xi in X1]), axis=0)
    return - y * sigmoid(- y * np.dot(theta, x)) * np.dot(-Y1 * sigmoid(np.dot(Y1, np.dot(theta, X1.T))), np.dot(x, np.dot(np.linalg.inv(H), X1.T)))

#List of influence values for each sample
influence_list = [get_influence(x,y,theta) for x,y in zip(X1,Y1)]

#Plot including influence value
plt.figure(figsize=(12,8))
plt.plot(X[Y==0, 0],X[Y==0,1],'o')
plt.plot(X[Y==1, 0],X[Y==1,1],'x')
plt.plot(xline, decision_boundary(xline, theta), label='decision boundary')
#Insert influence value into graph
for x,influence in zip(X, influence_list):
    plt.annotate(f"{influence:.1f}", xy=(x[0], x[1]), size=10)
plt.legend()
plt.show()

You can see that the influence value of the mislabeled sample is very high.

In addition, the sample in the lower left extends beyond the decision boundary, and there is concern that mislabeling has an adverse effect on learning.

Summary

--By using the influence function, you can find samples that are close to the decision boundary (not confident) and mislabels.

Apply Influence function to logistic regression

What is influence function

Logistic regression and influence function

When mislabel is intentionally mixed

Summary