If you use scikit-learn, it will be a moment, but it is also good to implement it by yourself as a study.
In the original article, multiclass classification and train_test_split were out of scope, so
It is simplified into a two-class question and evaluated using the same data as the learning data.
Please refer to the following article for details. Machine learning starting from zero 4th "Logistic regression"
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X_org = iris.data
y_org = iris.target
#Simplify the problem (see previous article)
idx = [y_org != 2]
X = X_org[idx][:,:2]
y = y_org[idx]
class LogisticRegression:
def __init__(self, n_feature=2):
self.w = np.random.rand(n_feature + 1) #Bias term
self.eta = 1e-2 #Learning rate
self.n_iter = 1000 #Number of parameter updates
self.loss = []
def fit(self, x, y): #Learning
for i in range(self.n_iter):
self._update(x, y)
def predict(self, x): #Forecast
x = np.c_[x,np.ones(len(x))] #Bias term
return self._forward(x)
def _forward(self, x): #Internal method.
d = np.dot(x, self.w)
return np.exp(d) / (1 + np.exp(d))
def _diff(self, x, y): #Internal method. Calculate the difference between the output and the correct answer.
diff = self._forward(x) - y
return diff
def _update(self, x, y): #Internal method. Update the parameter once.
x = np.c_[x,np.ones(len(x))] #Bias term
self.w -= self.eta * np.dot(self._diff(x, y), x) #Take the inner product and erase the dimension of the number of samples
lr = LogisticRegression()
lr.fit(X,y) #Learning
pred = (lr.predict(X) > 0.5).astype(np.int) #Forecast and evaluation
print (np.mean(pred == y)) #Average accuracy rate
Next, this article implements train_test_split
.
Machine learning starting from zero 5th "Learning data and test data"
Recommended Posts