・ Enter the data frame in df and the number of clusters in num. -Specify a random seed integer in random_state
def clustering_analytics(df, num):
df_temp = df.copy()
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
sc = StandardScaler()
#Standardization
df_std = sc.fit_transform(df_temp)
kmeans = KMeans(n_clusters=num, random_state=0)
clusters = kmeans.fit(df_std)
df_temp["cluster"] = clusters.labels_
return df_temp
・ Enter the data frame in df and the number of principal components in num.
def PCA_analytics(df, num):
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
sc = StandardScaler()
df_temp = df.copy()
#Standardization
df_std = sc.fit_transform(df_temp)
pca = PCA(n_components = num)
pca.fit(df_std)
df_temp__pca = pca.transform(df_std)
pca_df = pd.DataFrame(df_temp__pca)
print('components, main components')
print(pca.components_)
print('mean, mean')
print(pca.mean_)
print('covariance, covariance matrix')
print(pca.get_covariance())
W, v = np.linalg.eig(pca.get_covariance())
print('eigenvector, eigenvector')
print(v)
print('eigenvalue, eigenvalue')
print(W)
return pca_df
Recommended Posts