A summary of various visualization tools for improving the efficiency of analysis competitions. Gradually increase!
Heatmap display of correlation for each column of pandas data frame. It is used for the correlation of each feature and the correlation of the prediction result for the model ensemble.
-
fig ,ax = plt.subplots(1,1,figsize=(12,12))
sns.heatmap(df.corr(), annot=True, fmt='.7f', ax=ax)
df.corr()
-
import numpy as np
import pandas as pd
from scipy import signal
from sklearn.metrics import confusion_matrix, f1_score, plot_confusion_matrix
# Thanks to https://www.kaggle.com/marcovasquez/basic-nlp-with-tensorflow-and-wordcloud
def plot_cm(y_true, y_pred, title="", figsize=(14,14):
y_pred = y_pred.astype(int)
cm = confusion_matrix(y_true, y_pred, labels=np.unique(y_true))
cm_sum = np.sum(cm, axis=1, keepdims=True)
cm_perc = cm / cm_sum.astype(float) * 100
annot = np.empty_like(cm).astype(str)
nrows, ncols = cm.shape
for i in range(nrows):
for j in range(ncols):
c = cm[i, j]
p = cm_perc[i, j]
if i == j:
s = cm_sum[i]
annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
elif c == 0:
annot[i, j] = ''
else:
annot[i, j] = '%.1f%%\n%d' % (p, c)
cm = pd.DataFrame(cm, index=np.unique(y_true), columns=np.unique(y_true))
cm.index.name = 'Actual'
cm.columns.name = 'Predicted'
fig, ax = plt.subplots(figsize=figsize)
plt.title(title)
sns.heatmap(cm, cmap='viridis', annot=annot, fmt='', ax=ax)
Visualization of feature importance of LightGBM learning results
def display_feature_importance(models):
fi = pd.DataFrame(columns=['importance','feature'])
for i, m in enumerate(models):
df_t = pd.DataFrame(columns=['importance','feature'])
df_t['importance'] = m.feature_importance(importance_type='gain')
df_t['feature'] = m.feature_name()
fi = pd.concat([fi, df_t], axis=0)
fi = fi.groupby('feature').sum()
best_features = fi.sort_values(by='importance', ascending=False).reset_index()
plt.figure(figsize=(16, 16));
sns.barplot(x="importance", y="feature", data=best_features);
plt.title('LGB Features (avg over folds)');
print('worst:\n',best_features['feature'][-20:].values)
Recommended Posts