We have summarized the methods that are often used when doing machine learning. We will make corrections as needed.
StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() #Instance creation
scaler.fit(pd_sample) #Parameter calculation (mean, standard deviation, etc.)
pd_sample_sc = scaler.transform(pd_sample) #Data conversion
#pd_sample_sc = scaler.fit_transform(pd_sample)Can be executed collectively with
get_dummies
#pandas.get_dummies()function
pd_sample = pd.get_dummies(pd_sample)
train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
KMeans
from skleran.cluster import KMeans
kmeans = KMeans(n_clusters=4, random_state=0) #K-means model definition
clusters = kmeans.fit(pd_sample) #Clustering execution
pd_sample['cluster'] = clusters.labels_ #Get clustering results
PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2) #PCA model definition
pca.fit(pd_sample) #Principal component analysis
x_pca = pca.transform(pd_sample) #Data conversion (return value is array type object)
x_pca = pd.DataFrame(x_pca) #Restore in DataFrame type
#x_pca = pca.fit_transform(pd_sample)Can be executed collectively with
LinearRegression
from sklearn.linear_model import LinearRegression()
model = LinearRegreession() #Model initialization
model.fit(X_train, y_train) #Modeling
#Accuracy verification of learning data and evaluation data
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))
#Outputs a coefficient representing the degree of contribution for each explanatory variable
coef = pd.DataFrame({"feature_names":X.columns, "coefficient":model.coef_})
print(coef)
#Predict regression values for unknown data
print(model.predict(x_pred))
DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=0) #Model initialization
model.fit(X_train, y_train) #Modeling
#Accuracy verification of learning data and evaluation data
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))
#Outputs a coefficient representing the degree of contribution for each explanatory variable
importance = pd.DataFrame({"feature_names":X.columns, "coefficient":model.feature_importances_})
print(importance)
#Predict classification values for unknown data
print(model.predict(x_pred))
#0/Output the prediction probability of 1
print(model.predict_proba(x_pred))
#Correct answer rate= (TP+TN)/(TP+FN+FP+TN)
model.score(X_test, y_test)
#Mixed matrix
from skleran.metrics import confusion_matrix
matrix = confusion_matrix(X_test, y_test)
#Heat map of mixed matrix
import seaborn as sns
sns.heatmap(matrix, annot=True, cmap='Blues')
plt.xlabel('Prediction')
plt.ylabel('Target')
plt.show()
#Adaptation rate= TP/(TP+FP)
from sklearn.metrics import precision_score
precision_score(X_test, y_test)
#Recall= TP/(TP+FN)
from sklearn.metrics import recall_score
recall_score(X_test, y_test)
#F value= 2*(Precision*Recall)/(Precision+Recall)
from sklearn.metrics import f1_score
f1_score(X_test, y_test)