python
import numpy as np
import pandas as pd
#Copy the formula here
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.yeojohnson.html
def yeojohnson(lmbda, x):
if x >= 0 and lmbda != 0:
y = ((x + 1)**lmbda - 1) / lmbda
elif x >= 0 and lmbda == 0:
y = np.log(x + 1)
elif x < 0 and lmbda != 2:
y = -((-x + 1)**(2 - lmbda) - 1) / (2 - lmbda)
elif x < 0 and lmbda == 2:
y = -np.log(-x + 1)
else:
#Should not reach
raise
return y
#Value to convert
x_ary = np.arange(21)-10
#Try a combination of two patterns of lmbda
for lmbda_ary in [np.arange(10)*0.25, np.arange(10)*0.5]:
#Convert and combine into df
X_dic = {}
for lmbda in lmbda_ary:
X_dic[lmbda] = [yeojohnson(lmbda, float(x)) for x in x_ary]
df = pd.DataFrame(X_dic, index=x_ary)
#drawing
df.plot(cmap='viridis', marker='.')
plt.xlabel('Original value (x)')
plt.ylabel('Converted value (y)')
plt.title('Change in value after conversion when lmbda is changed')
plt.show()
output:
Recommended Posts