24e
Seaborn
import seaborn as sns
%matplotlib inline
import pandas as pd
df = pd.read_csv('train.csv')
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e3d0>
sns.set()
sns.distplot(df['Age'],bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd05850>
Voir la distribution de deux variables avec sns.jointplot ()
sns.jointplot(x='Age', y='Fare', data=df)
<seaborn.axisgrid.JointGrid at 0x7fcbcdbe31d0>
sns.jointplot(x='Age', y='Fare', data=df, kind='hex')
<seaborn.axisgrid.JointGrid at 0x7fcbcdb2b890>
(Important) Affichez la distribution de plusieurs colonnes en une seule fois avec sns.pairplot ()
sns.pairplot(df[['Age', 'Fare', 'Pclass', 'Survived']], hue='Survived', kind='scatter', plot_kws={'alpha': 0.5})
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kde.py:487: RuntimeWarning: invalid value encountered in true_divide
binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kdetools.py:34: RuntimeWarning: invalid value encountered in double_scalars
FAC1 = 2*(np.pi*bw/RANGE)**2
<seaborn.axisgrid.PairGrid at 0x7fcbcda34990>
25ème
sns.barplot () Créer un "graphique à barres"
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
df.head()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
sns.barplot(x='Survived', y='Age', data=df) #Valeur moyenne
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e390>
sns.barplot(x='Survived', y='Age', data=df, estimator=np.median) #Médian
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcce3a190>
sns.countplot () Compare le nombre de données
sns.countplot(x='Sex', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccfb0c10>
sns.boxplot () Comparer les valeurs par catégorie
sns.boxplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccd82750>
sns.boxplot(x='Pclass', y='Age', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc2a4ed0>
sns.violonplot () Visualisez l'analyse des données
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc1e3ad0>
sns.violinplot(x='Pclass', y='Age', data=df, hue=('Survived'))
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc107210>
sns.swarmplot () Vous pouvez voir la vraie distribution
sns.swarmplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc19a710>
sns.swarmplot(x='Pclass', y='Age', data=df, size=4, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6e9fe50>
26e
Dessiner une carte thermique
Faire une corrélation avec df.corr ()
import pandas as pd
df = pd.read_csv('train.csv')
corr = df.corr()
corr
|
PassengerId |
Survived |
Pclass |
Age |
SibSp |
Parch |
Fare |
PassengerId |
1.000000 |
-0.005007 |
-0.035144 |
0.036847 |
-0.057527 |
-0.001652 |
0.012658 |
Survived |
-0.005007 |
1.000000 |
-0.338481 |
-0.077221 |
-0.035322 |
0.081629 |
0.257307 |
Pclass |
-0.035144 |
-0.338481 |
1.000000 |
-0.369226 |
0.083081 |
0.018443 |
-0.549500 |
Age |
0.036847 |
-0.077221 |
-0.369226 |
1.000000 |
-0.308247 |
-0.189119 |
0.096067 |
SibSp |
-0.057527 |
-0.035322 |
0.083081 |
-0.308247 |
1.000000 |
0.414838 |
0.159651 |
Parch |
-0.001652 |
0.081629 |
0.018443 |
-0.189119 |
0.414838 |
1.000000 |
0.216225 |
Fare |
0.012658 |
0.257307 |
-0.549500 |
0.096067 |
0.159651 |
0.216225 |
1.000000 |
tracer Heatmap avec sns.heatmap ()
sns.heatmap(corr)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6df9850>
sns.heatmap(corr, cmap='coolwarm', annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcd0b3290>
Sns.heatmap () pour vous aider à obtenir une vue d'ensemble de vos données
flights = sns.load_dataset('flights')
print(len(flights))
flights.head()
144
|
year |
month |
passengers |
0 |
1949 |
January |
112 |
1 |
1949 |
February |
118 |
2 |
1949 |
March |
132 |
3 |
1949 |
April |
129 |
4 |
1949 |
May |
121 |
# pivot_Créer une table
flights_pivot = flights.pivot_table(index='month', columns='year', values='passengers')
flights_pivot
year |
1949 |
1950 |
1951 |
1952 |
1953 |
1954 |
1955 |
1956 |
1957 |
1958 |
1959 |
1960 |
month |
|
|
|
|
|
|
|
|
|
|
|
|
January |
112 |
115 |
145 |
171 |
196 |
204 |
242 |
284 |
315 |
340 |
360 |
417 |
February |
118 |
126 |
150 |
180 |
196 |
188 |
233 |
277 |
301 |
318 |
342 |
391 |
March |
132 |
141 |
178 |
193 |
236 |
235 |
267 |
317 |
356 |
362 |
406 |
419 |
April |
129 |
135 |
163 |
181 |
235 |
227 |
269 |
313 |
348 |
348 |
396 |
461 |
May |
121 |
125 |
172 |
183 |
229 |
234 |
270 |
318 |
355 |
363 |
420 |
472 |
June |
135 |
149 |
178 |
218 |
243 |
264 |
315 |
374 |
422 |
435 |
472 |
535 |
July |
148 |
170 |
199 |
230 |
264 |
302 |
364 |
413 |
465 |
491 |
548 |
622 |
August |
148 |
170 |
199 |
242 |
272 |
293 |
347 |
405 |
467 |
505 |
559 |
606 |
September |
136 |
158 |
184 |
209 |
237 |
259 |
312 |
355 |
404 |
404 |
463 |
508 |
October |
119 |
133 |
162 |
191 |
211 |
229 |
274 |
306 |
347 |
359 |
407 |
461 |
November |
104 |
114 |
146 |
172 |
180 |
203 |
237 |
271 |
305 |
310 |
362 |
390 |
December |
118 |
140 |
166 |
194 |
201 |
229 |
278 |
306 |
336 |
337 |
405 |
432 |
sns.heatmap(flights_pivot)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5baabd0>
27e
Changer le style de base avec sns.set ()
Spécifiez l'utilisation avec l'argument de contexte
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
sns.set(context=('poster'))
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc568f690>
Spécifiez le style de l'ensemble du graphique avec l'argument style
sns.set_style(style='whitegrid') #Changer la couleur d'arrière-plan
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc563d850>
Spécifiez la couleur avec l'argument de palette
sns.set(palette='bright')
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5472850>
Déposez les axes et les cadres avec sns.despine ()
sns.set(palette='bright' ,style='ticks') ##style='ticks'Axe à tracer
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc53ed810>
sns.set(palette='bright' )
sns.violinplot(x='Pclass', y='Age', data=df)
sns.despine()
Comme matplotlib, vous pouvez faire différentes choses avec le module plt.
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc52d8750>
sns.distplot(df['Age'])
plt.savefig('seaborn_sample.png')