lst = [-1, -0.7, -0.3, 0, 0.3, 0.7, 1]
fig, ax = plt.subplots(1, len(lst), figsize=(10*len(lst), 10))
for idx, corrcoef in enumerate(lst):
mean = np.array([0, 0])
cov = np.array([[1, corrcoef], [corrcoef, 1]])
x, y = np.random.multivariate_normal(mean, cov, 5000).T
ax[idx].scatter(x, y, color='royalblue')
ax[idx].set_title(f'corrcoef = {corrcoef:.2f}', size=50)
ax[idx].tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
x = np.random.randint(1, 1000, 1000)
y = ((x-400) ** 3 - 100 * (x-200) ** 2 + 100000000) / 1000000
corr_coef = np.corrcoef(x, y)[0, 1] #Matrice de corrélation
fig, ax = plt.subplots()
ax.scatter(x,y,color='royalblue')
ax.set_title(f'corr={corr_coef:.3f}', size=18)
ax.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
x = np.random.randint(1, 1000, 100)
y = (x - 500 ) ** 2 / 100 + 300
corr_coef = np.corrcoef(x, y)[0, 1]
fig, ax = plt.subplots()
ax.scatter(x,y,color='royalblue')
ax.set_title(f'corr={corr_coef:.3f}', size=18)
ax.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
En regardant l'ensemble des données (figure de gauche), le coefficient de corrélation est faible, Un exemple dans lequel il existe une corrélation élevée lors de la réduction de la plage de données de «x = 900» ou plus (figure de droite).
x = np.random.randint(900, 1000, 1000)
noise = np.random.randn(1000)
y = x + 10 * noise
corr_coef = np.corrcoef(x, y)[0, 1]
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[1].scatter(x,y,color='royalblue')
ax[1].set_title(f'corr={corr_coef:.3f}',size=18)
ax[1].tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
x2 = list(x) + [600, 700, 800]
y2 = list(y) + [2000, 1800, 1500]
corr_coef = np.corrcoef(x2, y2)[0, 1]
ax[0].scatter(x2,y2,color='royalblue')
ax[0].set_title(f'corr={corr_coef:.3f}',size=18)
ax[0].tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
import numpy as np
import matplotlib.pyplot as plt
x = np.array([2.0, 3.5, 4.0, 4.5, 5.0, 5.5])
y = np.array([3.0, 3.2, 3.9, 5.2, 8.4, 10.5])
xp = np.linspace(0, 8, 100)
for val in range(1, 2):
fx = np.poly1d(np.polyfit(x, y, val))
fig, ax = plt.subplots()
ax.plot(xp, fx(xp), '-', color='blue')
ax.scatter(x, y, color='deepskyblue', s=32)
ax.text(0.05, 0.8, s=f'y = {fx.coef[0]:.2f} x {fx.coef[1]:.2f}',size='x-large', transform=ax.transAxes)
ax.axhline([0], color='black')
ax.set_xlim(0, None)
ax.set_ylim(-3, 14)
ax.set_ylabel('Cost [JPY]')
ax.set_xlabel('Explanatory variables')
import numpy as np
import matplotlib.pyplot as plt
x = np.array([2.0, 3.5, 4.0, 4.5, 5.0, 5.5])
y = np.array([3.0, 3.2, 3.9, 5.2, 8.4, 10.5])
xp = np.linspace(2, 5.5, 100)
xp1 = np.linspace(0, 2, 100)
xp2 = np.linspace(5.5, 8, 100)
for val in range(1, 2):
fx = np.poly1d(np.polyfit(x, y, val))
fig, ax = plt.subplots()
ax.plot(xp, fx(xp), '-', color='blue')
ax.plot(xp1, fx(xp1), '-', color='red', linestyle='dashed')
ax.plot(xp2, fx(xp2), '-', color='red', linestyle='dashed')
ax.scatter(x, y, color='deepskyblue', s=32)
ax.text(0.05, 0.8, s=f'y = {fx.coef[0]:.2f} x {fx.coef[1]:.2f}',size='x-large', transform=ax.transAxes)
ax.axhline([0], color='black')
ax.axvline([2], color='gray', linestyle='dotted')
ax.axvline([5.5], color='gray', linestyle='dotted')
ax.set_xlim(0, 8)
ax.set_ylim(-3, 14)
ax.set_ylabel('Cost [JPY]')
ax.set_xlabel('Explanatory variables')
Si vous augmentez l'ordre ou augmentez le nombre de variables explicatives, Cela correspond aux données entraînées, mais la précision de la prédiction des données inconnues diminue. La figure montre le cas où l'ordre est augmenté.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([2.0, 3.5, 4.0, 4.5, 5.0, 5.5])
y = np.array([3.0, 3.2, 3.9, 5.2, 8.4, 10.5])
xp = np.linspace(0, 8, 100)
for val in range(2, 6):
fx = np.poly1d(np.polyfit(x, y, val))
fig, ax = plt.subplots()
ax.plot(xp, fx(xp), '-', color='blue')
ax.scatter(x, y, color='deepskyblue', s=32)
ax.axhline([0], color='black')
ax.set_xlim(0, None)
ax.set_ylim(-3, 14)
ax.set_ylabel('Cost [JPY]')
ax.set_xlabel('Explanatory variables')
ax.text(0.75, 0.85, s=f'Digree = {val}',size='x-large', transform=ax.transAxes)
Recommended Posts