matplotlib
import matplotlib.pyplot as plt
#Scatter plot creation from 2 items of columns
plt.scatter(df['column1'], df['column2'])
#Added labels for horizontal and vertical axes
plt.xlabel('column1')
plt.ylabel('column2')
plt.show()
#Correlation coefficient between column1 and column2
print(df[['column1', 'column2']].corr())
seaborn
import seaborn as sns
sns.scatterplot(data=df, x="column1", y="column2")
#Correlation coefficient between column1 and column2
print(df[['column1', 'column2']].corr())
sns.scatterplot(data=df, x="column1", y="column2", hue="y")
# hue="y"Is distinguished by color/style="y"Distinguished by shape
#If there is a lot of overlap alpha= 0~Concentration adjustment with 1
#Correlation coefficient between column1 and column2
print(df[['column1', 'column2']].corr())
import seaborn as sns
sns.scatterplot(data=df, x="column1", y="column2", hue="y", size="column3", sizes=(10,200))
#Specify a quantitative variable for the argument size of the scatterplot function
#In sizes, specify the size range of plot
# alpha=0~Concentration adjustment with 1
#Correlation coefficient between age and balance
print(df[['column1', 'column2']].corr())
ax = sns.scatterplot(data=df, x="column1", y="column2", hue="y", size="column3", sizes=(10,200))
ax.legend(loc="upper left", bbox_to_anchor=(1,1))
print(df[['column1', 'column2']].corr())
plotly
# pip install plotly
import plotly.express as px
fig=px.scatter(df,x="column1", y="column2", size ="column3", color="y",size_max=30)
fig.show()
import seaborn as sns
sns.jointplot(data=df, x="column1", y="column2",marginal_kws={"bins":10})
#Marginal that specifies the number of histograms_kws={"bins":Number}
#Color specification: color
# kind="hex"From plot to hexagonal bin display. The density represents the density of the plot.
#It seems that hue cannot be used as an argument
#Correlation coefficient between column1 and column2
print(df[['column1', 'column2']].corr())
Display the relationship between two items at once
matplotlib
import matplotlib.pyplot as plt
#Drawing a scatterplot matrix
pd.plotting.scatter_matrix(df[['column1','column2','column3','column4']])
plt.tight_layout()
plt.show()
import seaborn as sns
sns.pairplot(data=df[['column1','column2','column3','column4',"y"]],hue="y", diag_kind = "hist")
plt.show()
#Again, you can see the distribution of qualitative variables with hue as an argument.
#When specifying color coding for each qualitative variable: palette={'yes': 'red','no':'blue'}
#When specifying markers to plot markers='+' / markers=['+', 's', 'd']
#Diagonal Plot Histogram: diag_kind = "hist" /Kernel density estimation diag_kind = "kde"
#Plot density adjustment: alpha=0~1
#Draw a regression line on the scatter plot between the two items: kind='reg'
#Specify output graph size: height=2
#Specify the columns to graph: x_vars=['column1', 'column2'],y_vars=['column1', 'column2']
#The object type data you want to specify for hue is required in the specified df
# sns.pairplot(df[['column1','balance','day','duration']],hue="y")Error
# type(df[['column1','column2','column3','column4',"y"]]) # pandas.core.frame.DataFrame
#Output pair plot.Save as png
# sns.pairplot(df[['column1','column2','column3','column4',"y"]],hue="y").savefig('file.png')
#Correlation coefficient matrix
corr = df[['column1','column2','column3','column4',"y"]].corr(method="pearson")
print(corr)
#Make a heatmap from the correlation coefficient matrix
sns.heatmap(corr, cmap='coolwarm', annot=True)
plt.show()
#On / off the description of the correlation coefficient with annnot
import seaborn as sns
sns.catplot(data=df,x="category1", y="column1",hue="y",alpha=0.5)
Recommended Posts