Lors de l'analyse de données de séries chronologiques, il est parfois nécessaire d'extraire une série chronologique partielle et d'extraire des caractéristiques. Les caractéristiques suivantes peuvent être extraites.
from scipy import fftpack, signal
import scipy
import numpy as np
import matplotlib.pyplot as plt
def pentropy(y, fs):
f, Pxx = signal.periodogram(y, fs)
pk = Pxx / np.mean(Pxx)
S = scipy.stats.entropy(pk)
return S
def generate_features(y, x, fs=1, order=5):
"""
Parameters
----------
y : np.ndarray
Données de vague
x : np.ndarray
Heure etc.
fs : int
Fréquence d'échantillonnage
order : int
Intervalle de détection de pic
Returns
----------
np.ndarray
Vecteur caractéristique
"""
# norm
L1 = np.linalg.norm(y, ord=1)
L2 = np.linalg.norm(y, ord=2)
# mean
mean = np.mean(y)
# Standard deviation
std = np.std(y)
# skewness
skew = scipy.stats.skew(y)
# kurtosis
kurtosis = scipy.stats.kurtosis(y)
# mad
mad = np.linalg.norm(y - mean, ord=1)
# percentile
percentil_calc = np.percentile(y, [0, 1, 25, 50, 75, 99, 100])
# relative_percentile
relative_percentile = percentil_calc - mean
# value range
max_range = percentil_calc[-1] - percentil_calc[0]
# asymmetry
v_max = relative_percentile[-1]
v_min = relative_percentile[0]
asymmetry = v_max + v_min
#Obtenir l'indice de la valeur de pointe
maxid = signal.argrelmax(y, order=order) #Valeur maximum
minid = signal.argrelmin(y, order=order) #valeur minimum
# max height of peaks
max_height = np.max(y[maxid])
# min height of peaks
min_height = np.min(y[minid])
# peak height diff
peak_width = max_height - min_height
# mean value of peak's width
x_p = np.append(x[maxid], x[minid])
mean_width_all = np.mean(x_p)
# number of peaks
num_peak = len(x_p)
# max, min and their diff, mean of peak's width
diff1 = np.diff(x_p)
width_max = np.max(diff1)
width_min = np.min(diff1)
width_diff = width_max - width_min
width_mean = np.mean(diff1)
width_median = np.median(diff1)
# spectral entropy
se = pentropy(y, fs)
# merge to features (15features)
features = np.concatenate([np.asarray(
[L1, L2, mean, std, skew, kurtosis, mad, max_range,
asymmetry, max_height, min_height, peak_width,
mean_width_all, num_peak, width_max, width_min, width_diff,
width_mean, width_median, se]
), relative_percentile])
return features
x = np.linspace(0, 10, 100)
yorg = np.sin(x)
y = yorg + np.random.randn(100)*0.5
features = generate_features(y, x, order=5)