24th
Seaborn
import seaborn as sns
%matplotlib inline
import pandas as pd
df = pd.read_csv('train.csv')
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e3d0>
data:image/s3,"s3://crabby-images/f4228/f4228687db1e8fd68a10a4945c590a59d6000daa" alt="png"
sns.set()
sns.distplot(df['Age'],bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd05850>
data:image/s3,"s3://crabby-images/b3a66/b3a66a19e70f41ee871f72b7e7dd1d497c25d15d" alt="png"
See the distribution of two variables with sns.jointplot ()
sns.jointplot(x='Age', y='Fare', data=df)
<seaborn.axisgrid.JointGrid at 0x7fcbcdbe31d0>
data:image/s3,"s3://crabby-images/22213/22213506b7929698cdee5c0cf870514c1559dfca" alt="png"
sns.jointplot(x='Age', y='Fare', data=df, kind='hex')
<seaborn.axisgrid.JointGrid at 0x7fcbcdb2b890>
data:image/s3,"s3://crabby-images/cdc10/cdc10c60696fbdde0512f519c0ebd300427996a1" alt="png"
(Important) Display the distribution of multiple columns in one shot with sns.pairplot ()
sns.pairplot(df[['Age', 'Fare', 'Pclass', 'Survived']], hue='Survived', kind='scatter', plot_kws={'alpha': 0.5})
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kde.py:487: RuntimeWarning: invalid value encountered in true_divide
binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kdetools.py:34: RuntimeWarning: invalid value encountered in double_scalars
FAC1 = 2*(np.pi*bw/RANGE)**2
<seaborn.axisgrid.PairGrid at 0x7fcbcda34990>
data:image/s3,"s3://crabby-images/2bd81/2bd81db2c6aa18065035804ee9aa4c622d9fbc50" alt="png"
25th
sns.barplot () Make a "bar chart"
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
df.head()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
sns.barplot(x='Survived', y='Age', data=df) #Average value
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e390>
data:image/s3,"s3://crabby-images/6e2e0/6e2e025f234ab7a89c86fdcb96e27e7f4c97821e" alt="png"
sns.barplot(x='Survived', y='Age', data=df, estimator=np.median) #Median
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcce3a190>
data:image/s3,"s3://crabby-images/4f282/4f282fdf73a45336e4aa66191469f1c21345e7f2" alt="png"
sns.countplot () Compare the number of data
sns.countplot(x='Sex', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccfb0c10>
data:image/s3,"s3://crabby-images/2a469/2a469fd2685583ff2600594d7b19bba784f1949b" alt="png"
sns.boxplot () Compare values by category
sns.boxplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccd82750>
data:image/s3,"s3://crabby-images/4a30e/4a30e90a56d273cebd4b1280104800f405ce8dca" alt="png"
sns.boxplot(x='Pclass', y='Age', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc2a4ed0>
data:image/s3,"s3://crabby-images/45460/454604bbc1bd688e6b25fd682eaf750f1e6770fb" alt="png"
sns.violonplot () Visualize data analysis
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc1e3ad0>
data:image/s3,"s3://crabby-images/a7dad/a7dad33bf74a2585ce86cb572ed72cdef3a826db" alt="png"
sns.violinplot(x='Pclass', y='Age', data=df, hue=('Survived'))
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc107210>
data:image/s3,"s3://crabby-images/30c05/30c05ee0f655dd6e4fa9c48e1e9f284a4209e7b3" alt="png"
sns.swarmplot () You can see the true distribution
sns.swarmplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc19a710>
data:image/s3,"s3://crabby-images/6045c/6045c6cbfa8953695555ee17af71b4c8b5369e5b" alt="png"
sns.swarmplot(x='Pclass', y='Age', data=df, size=4, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6e9fe50>
data:image/s3,"s3://crabby-images/1ced9/1ced939f34e3d4d5880b66775ae942c4efbbc984" alt="png"
26th
Draw a Heatmap
Make a correlation with df.corr ()
import pandas as pd
df = pd.read_csv('train.csv')
corr = df.corr()
corr
|
PassengerId |
Survived |
Pclass |
Age |
SibSp |
Parch |
Fare |
PassengerId |
1.000000 |
-0.005007 |
-0.035144 |
0.036847 |
-0.057527 |
-0.001652 |
0.012658 |
Survived |
-0.005007 |
1.000000 |
-0.338481 |
-0.077221 |
-0.035322 |
0.081629 |
0.257307 |
Pclass |
-0.035144 |
-0.338481 |
1.000000 |
-0.369226 |
0.083081 |
0.018443 |
-0.549500 |
Age |
0.036847 |
-0.077221 |
-0.369226 |
1.000000 |
-0.308247 |
-0.189119 |
0.096067 |
SibSp |
-0.057527 |
-0.035322 |
0.083081 |
-0.308247 |
1.000000 |
0.414838 |
0.159651 |
Parch |
-0.001652 |
0.081629 |
0.018443 |
-0.189119 |
0.414838 |
1.000000 |
0.216225 |
Fare |
0.012658 |
0.257307 |
-0.549500 |
0.096067 |
0.159651 |
0.216225 |
1.000000 |
plot Heatmap with sns.heatmap ()
sns.heatmap(corr)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6df9850>
data:image/s3,"s3://crabby-images/0d830/0d8300378afee5dc570e8da26f4af7d15f411fdd" alt="png"
sns.heatmap(corr, cmap='coolwarm', annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcd0b3290>
data:image/s3,"s3://crabby-images/7fd0e/7fd0e2237725cbe92453c5487aa15a302c9e8f57" alt="png"
Sns.heatmap () to help you get a bird's eye view of your data
flights = sns.load_dataset('flights')
print(len(flights))
flights.head()
144
|
year |
month |
passengers |
0 |
1949 |
January |
112 |
1 |
1949 |
February |
118 |
2 |
1949 |
March |
132 |
3 |
1949 |
April |
129 |
4 |
1949 |
May |
121 |
# pivot_Create table
flights_pivot = flights.pivot_table(index='month', columns='year', values='passengers')
flights_pivot
year |
1949 |
1950 |
1951 |
1952 |
1953 |
1954 |
1955 |
1956 |
1957 |
1958 |
1959 |
1960 |
month |
|
|
|
|
|
|
|
|
|
|
|
|
January |
112 |
115 |
145 |
171 |
196 |
204 |
242 |
284 |
315 |
340 |
360 |
417 |
February |
118 |
126 |
150 |
180 |
196 |
188 |
233 |
277 |
301 |
318 |
342 |
391 |
March |
132 |
141 |
178 |
193 |
236 |
235 |
267 |
317 |
356 |
362 |
406 |
419 |
April |
129 |
135 |
163 |
181 |
235 |
227 |
269 |
313 |
348 |
348 |
396 |
461 |
May |
121 |
125 |
172 |
183 |
229 |
234 |
270 |
318 |
355 |
363 |
420 |
472 |
June |
135 |
149 |
178 |
218 |
243 |
264 |
315 |
374 |
422 |
435 |
472 |
535 |
July |
148 |
170 |
199 |
230 |
264 |
302 |
364 |
413 |
465 |
491 |
548 |
622 |
August |
148 |
170 |
199 |
242 |
272 |
293 |
347 |
405 |
467 |
505 |
559 |
606 |
September |
136 |
158 |
184 |
209 |
237 |
259 |
312 |
355 |
404 |
404 |
463 |
508 |
October |
119 |
133 |
162 |
191 |
211 |
229 |
274 |
306 |
347 |
359 |
407 |
461 |
November |
104 |
114 |
146 |
172 |
180 |
203 |
237 |
271 |
305 |
310 |
362 |
390 |
December |
118 |
140 |
166 |
194 |
201 |
229 |
278 |
306 |
336 |
337 |
405 |
432 |
sns.heatmap(flights_pivot)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5baabd0>
data:image/s3,"s3://crabby-images/e6110/e6110ad41c29de9ddbccd7aef0ad065f9e5786aa" alt="png"
27th
Change basic style with sns.set ()
Specify the usage with the context argument
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
sns.set(context=('poster'))
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc568f690>
data:image/s3,"s3://crabby-images/c142c/c142cf40159a5409dd9e9da210228d4aa466ee16" alt="png"
Specify the style of the entire graph with the style argument
sns.set_style(style='whitegrid') #Change background color
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc563d850>
data:image/s3,"s3://crabby-images/fcdf6/fcdf66f8209dfba83bb83a0261f7f1932461e2c6" alt="png"
Specify color with palette argument
sns.set(palette='bright')
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5472850>
data:image/s3,"s3://crabby-images/79d6c/79d6c8f4981c6956bf06fd6518c3fb8023c4930f" alt="png"
Drop axes and frames with sns.despine ()
sns.set(palette='bright' ,style='ticks') ##style='ticks'With axis to plot
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc53ed810>
data:image/s3,"s3://crabby-images/34f6f/34f6f2ba8d4951d5aef08ff4c9908656155d2279" alt="png"
sns.set(palette='bright' )
sns.violinplot(x='Pclass', y='Age', data=df)
sns.despine()
data:image/s3,"s3://crabby-images/f7654/f7654d89ec7a8777f9287209adbf7044b6b985a3" alt="png"
Like matplotlib, you can do many things with the plt module
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc52d8750>
data:image/s3,"s3://crabby-images/b0c93/b0c93d857bf1e937beb70e46e2570c99c30054cd" alt="png"
sns.distplot(df['Age'])
plt.savefig('seaborn_sample.png')
data:image/s3,"s3://crabby-images/8bf39/8bf39fc43148a047486c813f90652e2d26ee2058" alt="png"