#Importer les bibliothèques requises
from keras.datasets import mnist
import numpy as np
import pandas as pd
import sklearn
#Afficher les résultats du tracé dans le notebook lors de l'utilisation du notebook Jupyter
import matplotlib.pyplot as plt
%matplotlib inline
from keras.layers import Input, Dense
from keras.models import Model
from keras import backend as K
import gc
Using TensorFlow backend.
feature_dims = range(8, 32+1, 8)
display(list(feature_dims))
[8, 16, 24, 32]
#Lire les données avec la fonction Keras. Mélangez les données et divisez-les en données d'entraînement et données d'entraînement
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#Convertir les données 2D en valeur numérique
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
#Conversion de type
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
#Diviser par 255 comme nouvelle variable
x_train /= 255
x_test /= 255
# one-Méthode d'encodage à chaud
from keras.utils.np_utils import to_categorical
#10 cours
num_classes = 10
y_train = y_train.astype('int32')
y_test = y_test.astype('int32')
labels = y_test
# one-hot encoding
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
def fitting(feature_dim, x_train, y_train, x_test, y_test):
#Construction de modèles
layer_name = 'encoded'
input_img = Input(shape=(784,))
x1 = Dense(256, activation='relu')(input_img)
x2 = Dense(64, activation='relu')(x1)
encoded = Dense(feature_dim, activation='relu', name=layer_name)(x2)
x3 = Dense(64, activation='relu')(encoded)
x4 = Dense(256, activation='relu')(x3)
decoded = Dense(784, activation='sigmoid')(x4)
autoencoder = Model(input=input_img, output=decoded)
z_layer_model = Model(inputs=autoencoder.input,
outputs=autoencoder.get_layer(layer_name).output)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
autoencoder.summary()
#Apprentissage
history = autoencoder.fit(x_train, x_train,
nb_epoch=40,
batch_size=256,
shuffle=True,
validation_data=(x_test, x_test))
result = [autoencoder.predict(x_test), z_layer_model.predict(x_test)]
K.clear_session() #← C'est
gc.collect()
from IPython.display import clear_output
clear_output()
return (history, autoencoder, result)
#model = fitting(10, x_train, y_train, x_test, y_test)
models = [None] * len(feature_dims)
histories = [None] * len(feature_dims)
dec_imgs = [None] * len(feature_dims)
results = [None] * len(feature_dims)
for i in range(len(feature_dims)):
(histories[i], models[i], dec_imgs[i]) = fitting(feature_dims[i], x_train, y_train, x_test, y_test)
for i in range(len(feature_dims)):
print(feature_dims[i])
#Affichage de l'image de test et de l'image convertie
n = 10
plt.figure(figsize=(10, 2))
for j in range(n):
#Afficher l'image de test
ax = plt.subplot(2, n, j+1)
plt.imshow(x_test[j].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#Afficher l'image convertie
ax = plt.subplot(2, n, j+1+n)
plt.imshow(dec_imgs[i][0][j].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
8
16
24
32
for i in range(len(feature_dims)):
results[i] = dec_imgs[i][1]
#model.save('model/mnist-10')
#model = keras.models.load_model('model/mnist-10')
#for i in range(len(feature_dims)):
# models[i].pop() #Supprimez la couche softmax à l'étape finale et utilisez la couche d'entités comme étape finale.
# models[i].summary()
#result = model.predict(x_test)
#results = [None] * len(feature_dims)
#for i in range(len(feature_dims)):
# keras.backend.clear_session()
# results[i] = models[i].predict(x_test)
def tsne(result):
#t-Réduction de dimension avec SNE
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state = 0, perplexity = 30, n_iter = 1000)
return tsne.fit_transform(result)
#tsne = tsne(result)
tsnes = [None] * len(feature_dims)
for i in range(len(feature_dims)):
tsnes[i] = tsne(results[i])
#df = pd.DataFrame(tsne, columns = ['x', 'y'])
#df['label'] = labels
def km(n_clusters, result):
# k-Cluster au moyen
from sklearn.cluster import KMeans
return KMeans(n_clusters).fit_predict(result)
#km = km(10, result)
#df['km'] = km
kms = [None] * len(feature_dims)
for i in range(len(feature_dims)):
kms[i] = km(10, results[i])
def DBSCAN(n_clusters, result):
from sklearn.cluster import DBSCAN
db = DBSCAN(eps=0.2, min_samples=n_clusters).fit(result)
return db.labels_
#dbscan = DBSCAN(20, result)
#df['DBSCAN'] = dbscan
def hierarchy(result):
from scipy.cluster.hierarchy import linkage, dendrogram
result1 = linkage(result,
metric = 'braycurtis',
#metric = 'canberra',
#metric = 'chebyshev',
#metric = 'cityblock',
#metric = 'correlation',
#metric = 'cosine',
#metric = 'euclidean',
#metric = 'hamming',
#metric = 'jaccard',
#method= 'single')
method = 'average')
#method= 'complete')
#method='weighted')
return result1
#hierarchy = hierarchy(result)
#display(hierarchy)
def label_to_colors(label):
color_dict = dict([(color[0], color[1]['color']) for color in zip(np.unique(label), plt.rcParams['axes.prop_cycle'])])
colors = np.empty(label.shape, np.object)
for k, v in color_dict.items():
colors[label==k] = v
return colors
#def cluster_visualization(x, y, label, cluster, method, n_clusters):
def cluster_visualization(x, y, label, cluster):
plt.figure(figsize = (30, 15))
plt.subplot(1,2,1)
plt.scatter(x, y, c=label_to_colors(label))
# for i in range(10):
# tmp_df = df[df['label'] == i]
# plt.scatter(tmp_df['x'], tmp_df['y'], label=i)
# plt.legend(loc='upper left', bbox_to_anchor=(1,1))
plt.subplot(1,2,2)
plt.scatter(x, y, c=label_to_colors(cluster))
# for i in range(n_clusters):
# tmp_df = df[df[method] == i]
# plt.scatter(tmp_df['x'], tmp_df['y'], label=i)
# plt.legend(loc='upper left', bbox_to_anchor=(1,1))
for i in range(len(feature_dims)):
cluster_visualization(tsnes[i][:,0], tsnes[i][:,1], labels, kms[i])
# https://qiita.com/mamika311/items/75c24f6892f85593f7e7
from sklearn.metrics.cluster import adjusted_rand_score
for i in range(len(feature_dims)):
print("dim:" + str(feature_dims[i]) + " RMI: " + str(adjusted_rand_score(labels, kms[i])))
dim:8 RMI: 0.3987309485653015
dim:16 RMI: 0.40738458796211546
dim:24 RMI: 0.3677837864385967
dim:32 RMI: 0.43182464556112676
# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.normalized_mutual_info_score.html
# https://qiita.com/kotap15/items/38289edfe822005e1e44
from sklearn.metrics import normalized_mutual_info_score
#display(normalized_mutual_info_score(labels, df['km']))
for i in range(len(feature_dims)):
print("dim:" + str(feature_dims[i]) + " NMI: " + str(normalized_mutual_info_score(labels, kms[i])))
dim:8 NMI: 0.525123015401584
dim:16 NMI: 0.5452028060642871
dim:24 NMI: 0.5173700351804098
dim:32 NMI: 0.5592638372411443
def shilhouette(clusters, x_test):
from sklearn.metrics import silhouette_samples
from matplotlib import cm
plt.figure(figsize = (10, 10))
cluster_labels=np.unique(clusters)
n_clusters=cluster_labels.shape[0]
silhouette_vals=silhouette_samples(x_test,clusters,metric='euclidean')
y_ax_lower,y_ax_upper=0,0
yticks=[]
for i,c in enumerate(cluster_labels):
c_silhouette_vals=silhouette_vals[clusters==c]
print(len(c_silhouette_vals))
c_silhouette_vals.sort()
y_ax_upper +=len(c_silhouette_vals)
color=cm.jet(float(i)/n_clusters)
plt.barh(range(y_ax_lower,y_ax_upper),
c_silhouette_vals,
height=1.0,
edgecolor='none',
color=color
)
yticks.append((y_ax_lower+y_ax_upper)/2.)
y_ax_lower += len(c_silhouette_vals)
#Si le coefficient de silhouette est 1, vous pouvez bien regrouper.
#De plus, lorsque la largeur de la silhouette est égale en moyenne en termes de nombre de clusters, cela indique que l'ensemble des données peut être divisé également.
#Cette largeur de division=Une méthode de réglage possible consiste à optimiser k de sorte que les largeurs des barres de silhouette soient égales et que le coefficient de silhouette se rapproche de 1..
#Tracez une ligne à la position moyenne
silhouette_avg=np.mean(silhouette_vals)
plt.axvline(silhouette_avg,color="red",linestyle="--")
plt.ylabel("Cluster")
plt.xlabel("Silhouette coefficient")
for i in range(len(feature_dims)):
shilhouette(kms[i], x_test)
1077
1368
1273
824
854
1070
1251
848
758
677
1047
1660
869
824
1400
532
926
770
1314
658
793
784
929
1452
889
733
1592
1381
521
926
1503
843
810
1343
500
908
1559
744
973
817
Recommended Posts