** It can also be used for regression, but here we will do a classification case. ** **
import numpy as np
import pandas as pd
from sklearn import datasets
# sklearn.neighbors module k-NN method
from sklearn.neighbors import KNeighborsClassifier
#sklearn data split utility
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
#Method to generate color map
from matplotlib.colors import ListedColormap
#Japanese display module of matplotlib
!pip install japanize-matplotlib
import japanize_matplotlib
Variable name | meaning | Note | Data type | |
---|---|---|---|---|
0 | species | type | Setosa=0, Versicolour=1, Virginica=2 | int64 |
1 | sepal length | Sepal length | Continuous amount(cm) | float64 |
2 | sepal width | Sepal width | Continuous amount(cm) | float64 |
3 | petal length | Petal length | Continuous amount(cm) | float64 |
4 | petal width | Petal width | Continuous amount(cm) | float64 |
iris = datasets.load_iris()
#Explanatory variable (feature)
print("label:\n", iris.feature_names)
print("shape:\n", iris.data.shape)
print("First 10 lines:\n", iris.data[0:10, :])
#Objective variable (type)
print("label:\n", iris.target_names)
print("shape:\n", iris.target.shape)
print("Full display:\n", iris.target)
X_train, X_test, y_train, y_test = train_test_split(
iris.data,
iris.target,
stratify = iris.target, #Stratified sampling
random_state = 0)
stratify = iris.target
specifies ** stratified sampling ** by type (iris.target). The default is random sampling, so here we will divide it so that it retains three types of composition ratios for both training and testing.print("shape:", y_train.shape)
#Get the number of unique elements
np.unique(y_train, return_counts=True)
#Variable to store the correct answer rate
training_accuracy = []
test_accuracy = []
#k while changing k-Execute NN and get the correct answer rate
for k in range(3,21):
#Pass k to create an instance, fit the data and generate a model
kNN = KNeighborsClassifier(n_neighbors = k)
kNN.fit(X_train, y_train)
#Obtain the correct answer rate with score and store it sequentially
training_accuracy.append(kNN.score(X_train, y_train))
test_accuracy.append(kNN.score(X_test, y_test))
#Convert correct answer rate to numpy array
training_accuracy = np.array(training_accuracy)
test_accuracy = np.array(test_accuracy)
#Changes in the correct answer rate for training and testing
plt.figure(figsize=(6, 4))
plt.plot(range(3,21), training_accuracy, label='Training')
plt.plot(range(3,21), test_accuracy, label='test')
plt.xticks(np.arange(2, 21, 1)) #x-axis scale
plt.xlabel('k number')
plt.ylabel('Correct answer rate')
plt.title('Transition of correct answer rate')
plt.grid()
plt.legend()
#Transition of difference in correct answer rate
plt.figure(figsize=(6, 4))
difference = np.abs(training_accuracy - test_accuracy) #Calculate the difference
plt.plot(range(3,21), difference, label='Difference')
plt.xticks(np.arange(2, 21, 1)) #x-axis scale
plt.xlabel('k number')
plt.ylabel('Difference(train - test)')
plt.title('Transition of difference in correct answer rate')
plt.grid()
plt.legend()
plt.show()
#Specify the number of k
k = 15
#Set explanatory variable X and objective variable y
X = iris.data[:, :2]
y = iris.target
#Create an instance, fit the data and generate a model
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X, y)
Z
to draw the boundaries of each group on a two-dimensional plane.#Specify mesh spacing
h = 0.02
#Create a color map
cmap_surface = ListedColormap(['darkseagreen', 'mediumpurple', 'gold']) #For area charts
cmap_dot = ListedColormap(['darkgreen', 'darkslateblue', 'olive']) #For scatter plots
# x,Get the minimum and maximum values of the y-axis
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#Generate grid columns at specified mesh intervals
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
#Predict by passing the grid sequence to the model
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape) #Shape conversion
xx
and yy
to one dimension with the ravel ()
function, and pass it to the model to predict what is combined with the c_ ()
function of numpy.plt.figure(figsize=(6,5))
#Isolate diagram
plt.pcolormesh(xx, yy, Z, cmap=cmap_surface)
#Scatter plot
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_dot, s=30)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.show()
pcolormesh ()
function produces a color plot based on an amorphous rectangular grid.(x, y, Z, c)
, x, y
is the coordinates of the mesh from the left. The data Z
, which has group information for each cell, is assigned a color with c
.Recommended Posts