Les cartes des coefficients de corrélation sont souvent dessinées dans l'analyse des données atmosphériques et océaniques. En Python, Numpy et Scipy ont des fonctions pour calculer le coefficient de corrélation, vous pouvez donc dessiner une carte du coefficient de corrélation en les utilisant. Cependant, il existe plusieurs façons de le faire, nous allons donc comparer ici leurs performances.
def crr_loop_scipy(x1, x2):
ret = numpy.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj], _ = scipy.stats.pearsonr(x1[:,ii], x2[:,jj])
return ret
def crr_loop_numpy(x1, x2):
ret = numpy.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj] = numpy.corrcoef(x1[:,ii], x2[:,jj])[1,0]
return ret
def crr_numpy(x1, x2):
return numpy.corrcoef(x1, x2, rowvar=False)[0:x1.shape[1], x1.shape[1]:]
def crr_original(x1, x2):
crscov = np.dot(x1.T, x2) / x1.shape[0]
std1 = np.std(x1, axis=0).reshape(x1.shape[1], 1)
std2 = np.std(x2, axis=0).reshape(x2.shape[1], 1)
crsstd = np.dot(std1, std2.T)
return crscov / crsstd
def generate_data(nx):
nt = 1000
data1 = numpy.random.randn(nt, nx)
data2 = numpy.random.randn(nt, nx)
return data1, data2
python
import time
def measure_time(f, *args):
start = time.time()
f(*args)
return time.time() - start
def measure_mean_time(f, *args):
n = 2
return numpy.mean([measure_time(f, *args) for i in xrange(n)])
Le résultat est,
Était extrêmement rapide. De plus, comme scipy est plus rapide que numpy, il peut être préférable d'utiliser scipy même si le but n'est pas la carte des coefficients de corrélation comme cette fois.
import numpy as np
from scipy import stats
import time
def crr_loop_scipy(x1, x2):
ret = np.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj], _ = stats.pearsonr(x1[:,ii], x2[:,jj])
return ret
def crr_loop_numpy(x1, x2):
ret = np.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj] = np.corrcoef(x1[:,ii], x2[:,jj])[1,0]
return ret
def crr_numpy(x1, x2):
return np.corrcoef(x1, x2, rowvar=False)[0:x1.shape[1], x1.shape[1]:]
def crr_original(x1, x2):
crscov = np.dot(x1.T, x2) / x1.shape[0]
std1 = np.std(x1, axis=0).reshape(x1.shape[1], 1)
std2 = np.std(x2, axis=0).reshape(x2.shape[1], 1)
crsstd = np.dot(std1, std2.T)
return crscov / crsstd
def generate_data(nx):
nt = 1000
data1 = np.random.randn(nt, nx)
data2 = np.random.randn(nt, nx)
return data1, data2
def measure_time(f, *args):
start = time.time()
f(*args)
return time.time() - start
def measure_mean_time(f, *args):
n = 2
return np.mean([measure_time(f, *args) for i in xrange(n)])
def experiment(nxs, crrs):
mean_times = np.empty([len(crrs), len(nxs)])
for icrr, crr in enumerate(crrs):
for ix, nx in enumerate(nxs):
x1, x2 = generate_data(nx)
mean_times[icrr, ix] = measure_mean_time(crr, x1, x2)
return mean_times
if __name__ == '__main__:
import matplotlib.pyplot as plt
nxs1 = range(200, 200 + 1000, 200)
nxs2 = range(200, 200 + 10000, 600)
mean_times1 = experiment(nxs1, [crr_loop_scipy, crr_loop_numpy])
mean_times2 = experiment(nxs2, [crr_numpy, crr_original])
lebels1 = ['loop_sci', 'loop_np']
labels2 = ['np', 'original']
for label1, label2, mean_time1, mean_time2 in zip(lebels1, labels2, mean_times1, mean_times2):
plt.plot(nxs1, mean_time1, label=label1)
plt.plot(nxs2, mean_time2, label=label2)
plt.xlabel('nx')
plt.ylabel('time (sec)')
plt.title('nt=1000')
plt.xlim([0, nxs2[-1]])
plt.ylim([0, 80])
plt.legend()
plt.show()
Recommended Posts