Last night, [Introduction to Data Scientists] I summarized the basics of Pandas as the basis of scientific calculation, data processing, and how to use the graph drawing library, but tonight I will summarize the basics of Matplotlib of the graph drawing library. I will supplement the explanations in this book. 【Caution】 After reading the "Data Scientist Training Course at the University of Tokyo", I would like to summarize the points that I have some doubts about or that I find useful. Therefore, I think the synopsis will be straightforward, but please read it, thinking that the content has nothing to do with this book.
Most of the time, I use plt on the third line. Since it is used below, numpy is also described.
import matplotlb as mlp
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(0)
x = np.random.randn(30)
y = np.sin(x) + np.random.randn(30)
plt.figure(figsize=(20,6))
plt.plot(x,y, 'o') #alphabet o
#plt.scatter(x,y)
plt.title('Title name')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()
Increased the number of data by 10 times The random number component was reduced by half so that the sin curve of the y-axis fluctuation can be seen. The following connects the points o on the plot with a polygonal line.
np.random.seed(0)
x = np.arange(30)
y = np.sin(x) + 0.5*np.random.randn(30) #Random number effect halved
plt.figure(figsize=(20,6))
plt.plot(x,y, label = 'Label') #'o'Connect with a eraser
plt.legend()
plt.title('Title name')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()
Leave the plot o. Change the argument of plt.plot to the following.
Designation | Remarks |
---|---|
color='red' | Color specification: blue, green, black, yellow, pink,... |
lw=1 | Line width,1,2,3,...,10,.. |
linestyle='-' | line:'--', '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted',.. |
marker='o' | Plot shape:'.', ... |
matplotlib: Specifying Colors; Comparison table of how colors actually look
matplotlib.markers plot symbol and actual marker comparison table
plt.plot(x,y,color='red', lw=1, linestyle='-',marker='o', label = 'Label')
np.random.seed(0)
x = np.arange(300)
y = np.sin(np.pi*x/30) + 0.5*np.random.randn(300)
plt.figure(figsize=(20,6))
plt.plot(x,y,color='blue', lw=1, linestyle='-',marker='o', label = 'Label') #linestyle='--' 'red'
plt.legend()
plt.title('Title name')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()
np.random.seed(0)
x = np.arange(300)
y = np.random.randn(300).cumsum() #Change here to cumulative sum cumsum
plt.figure(figsize=(20,6))
plt.plot(x,y,color='blue', lw=1, linestyle='-',marker='o', label = 'Label')
plt.legend()
plt.title('Title name')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()
It's a muddy story, but I'll supplement it. The result is as follows.
x = np.arange(300)
print(type(x),len(x),type(x[0]),x)
#<class 'numpy.ndarray'> 300 <class 'numpy.int32'> [ 0 1 2...299]
x = np.linspace(0,299,300)
print(type(x),len(x),x)
#<class 'numpy.ndarray'> 300 <class 'numpy.float64'> [ 0. 1. 2. ...299.]
x = np.arange(0,300,1)
print(type(x),len(x),type(x[0]),x)
#<class 'numpy.ndarray'> 300 <class 'numpy.int32'> [ 0 1 2 ...299]
x = np.arange(0.,300.,1.)
print(type(x),len(x),type(x[0]),x)
#<class 'numpy.ndarray'> 300 <class 'numpy.float64'> [ 0. 1. 2. ...299.]
plt.figure(figsize=(20,6))
plt.subplot(2,1,1)
x = np.linspace(-10,10,100)
plt.plot(x,np.sin(x))
plt.subplot(2,1,2)
y = np.linspace(-10,10,100)
plt.plot(y,np.sin(2*y))
plt.grid(True)
plt.show()
In 4 quadrants, it can be drawn as follows.
plt.figure(figsize=(20,6))
x = np.linspace(-10,10,100)
plt.subplot(2,2,1)
plt.plot(x,np.sin(x))
plt.subplot(2,2,2)
plt.plot(x,np.sin(2*x))
plt.subplot(2,2,3)
plt.plot(x,np.sin(3*x))
plt.subplot(2,2,4)
plt.plot(x,np.sin(4*x))
plt.grid(True)
plt.show()
It can also be drawn as follows.
plt.figure(figsize=(20,6))
x = np.linspace(-10,10,100)
for n in range(1,5,1):
plt.subplot(2,2,n)
plt.plot(x,np.sin(n*x))
plt.grid(True)
plt.show()
Usually, it is drawn as follows. The content of the graph is difficult, but I think the appearance is easy to understand.
import numpy as np
import matplotlib.pyplot as plt
# Data for plotting
t = np.arange(0.01, 20.0, 0.01)
# Create figure
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(8,6))
# log y axis
ax1.semilogy(t, np.exp(-t / 5.0))
ax1.set(title='semilogy')
ax1.grid()
# log x axis
ax2.semilogx(t, np.sin(2 * np.pi * t))
ax2.set(title='semilogx')
ax2.grid()
# log x and y axis
ax3.loglog(t, 20 * np.exp(-t / 10.0), basex=2)
ax3.set(title='loglog base 2 on x')
ax3.grid()
# With errorbars: clip non-positive values
# Use new data for plotting
x = 10.0**np.linspace(0.0, 2.0, 20)
y = x**2.0
ax4.set_xscale("log", nonposx='clip')
ax4.set_yscale("log", nonposy='clip')
ax4.set(title='Errorbars go negative')
ax4.errorbar(x, y, xerr=0.1 * x, yerr=5.0 + 0.75 * y)
# ylim must be set after errorbar to allow errorbar to autoscale limits
ax4.set_ylim(bottom=0.1)
fig.tight_layout()
plt.show()
Display the graph of the following function used in Newton's method. In addition, plot the solution obtained by Newton's method.
f(x) = x^2 + 2x + 1
def my_function(x):
return x**2 + 2 * x + 1
from scipy.optimize import newton
x0 = newton(my_function,0)
x = np.arange(-10,10)
plt.figure(figsize = (8, 6))
plt.plot(x, my_function(x))
plt.plot(x0, 0, 'red', marker = 'o',markersize=20)
plt.plot(x0, 0, 'black', marker = 'o',markersize=5)
plt.grid(True)
plt.show()
I also tried the following function.
def my_function(x):
return x**3 + 2 * x + 1
It seems unstable, but a solution is being sought. Here, the marker plot is devised, Marker reference has a description of the attributes of marker, and the following example was used as a reference.
marker_style = dict(linestyle=':', color='0.8', markersize=10,
markerfacecolor="tab:blue", markeredgecolor="tab:blue")
np.random.seed(0)
plt.figure(figsize = (8,6))
y = np.random.randn(10**5)*10 + 50
plt.hist(y, bins = 60, range =(20,80))
plt.grid(True)
plt.show()
x = np.arange(-10,10,0.1)
plt.figure(figsize = (8, 6))
plt.plot(x, np.sin(x), 'red', marker = 'o', linestyle = '-', label = 'sin')
plt.plot(x, np.cos(x), 'blue', marker = 'o', linestyle = '--', label = 'cos')
plt.legend()
plt.grid(True)
plt.show()
plt.close()
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8,6))
np.random.seed(0)
y1 = np.random.uniform(0.0,1.0,100000)
ax1.hist(y1, bins = 100, range =(0.0,1.0))
ax1.set_ylabel('y100000')
y2 = np.random.uniform(0.0,1.0,10000)
ax2.hist(y2, bins = 100, range =(0.0,1.0))
ax2.set_ylabel('y10000')
plt.grid(True)
plt.show()
-Use two sets of 0-1 uniform random numbers to find the pi from the frequency of entering a circle and a square. ・ Graph
>python pipi.py
pipi= 3.6 9 10
pipi= 3.4 17 20
pipi= 3.466666666666667 26 30
pipi= 3.3 33 40
pipi= 3.44 43 50
pipi= 3.3333333333333335 50 60
pipi= 3.3714285714285714 59 70
pipi= 3.35 67 80
pipi= 3.422222222222222 77 90
pipi= 3.4 85 100
pipi= 3.3 165 200
pipi= 3.26 326 400
pipi= 3.1266666666666665 469 600
pipi= 3.12 624 800
pipi= 3.104 776 1000
pipi= 3.024 1512 2000
pipi= 3.036 2277 3000
pipi= 3.054 3054 4000
pipi= 3.0712 3839 5000
pipi= 3.082666666666667 4624 6000
pipi= 3.0908571428571427 5409 7000
pipi= 3.0985 6197 8000
pipi= 3.104 6984 9000
pipi= 3.1068 7767 10000
pipi= 3.1224 15612 20000
pipi= 3.126 23445 30000
pipi= 3.1283 31283 40000
pipi= 3.13176 39147 50000
pipi= 3.138 47070 60000
pipi= 3.1370857142857145 54899 70000
pipi= 3.13575 62715 80000
pipi= 3.1347555555555555 70532 90000
pipi= 3.13364 78341 100000
At most, there are 4 significant figures up to the third decimal place, and there is an error in this place.
import numpy as np
import matplotlib.pyplot as plt
fig, (ax1,ax2) = plt.subplots(2, 1, figsize=(8,16))
np.random.seed(0)
s = 10000
x = np.random.uniform(0.0,1.0,s)
y = np.random.uniform(0.0,1.0,s)
x1 = np.arange(0,np.pi/2,0.01)
y1 = np.sin(x1)
y2 = np.cos(x1)
ax1.plot(y1,y2, lw = 3)
plt.grid(True)
count = 0
s1 = 0
for i in range(s):
s1 += 1
#print(i)
if np.math.hypot(x[i],y[i]) <= 1:
count += 1
ax1.plot(x[i],y[i], color = 'green',marker = 'o', markersize = 3)
else:
ax1.plot(x[i],y[i], color = 'red', marker = 'o', markersize = 3)
rate = count/s1
ax2.plot(s1,4*rate,color = 'blue', marker = 'o',linestyle = '-', markersize = 3)
#plt.pause(0.1)
if i%500==499:
print('pipi=',4*rate,count,s1)
plt.savefig("./fig/pipi{}.png ".format(i))
#The output has been changed but omitted
Although it is far from convergence, it output up to 100000 data. You can see the atmosphere gradually approaching 3.1415 ...
・ Organized the basics of how to use Matplotlib ・ Graph division is a summary of general ones ・ Plt.savefig was used for general problems ・ Π was calculated by the Monte Carlo method and visualized in a graph.
・ This book will be used everywhere in the future.
I think it's best to see as specific examples as possible, so I'll link below. Matplotlib Gallery The following Tutorial is a simple example and explains according to the difficulty level. Matplotlib Tutorials
Recommended Posts