import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import xarray as xr
Here's a slightly more advanced way to use xarray. For xarray, see Previous Post.
Rolling
It corresponds to pandas.rolling
.
Multidimensional data The same operation can be repeated for a small section along an axis.
In particular, it applies the operation while shifting the subsections one point at a time and returns data of the same size.
The operation of dividing the data into non-overlapping subsections and determining the representative value in that section is
This is binning
, which will be explained next.
da = xr.DataArray(np.sin(np.linspace(0,6,100)) + np.random.randn(100)*0.3,
dims={'time'}, coords={'time':np.linspace(0,6,100)})
da
<xarray.DataArray (time: 100)>
array([-0.217099, 0.594632, -0.213687, -0.088451, 0.643075, 0.672311,
-0.133941, 0.419469, 0.836265, 0.436087, 0.688019, 0.416498,
0.81197 , 0.567076, 0.536079, 1.135186, 0.202363, 0.643781,
0.427754, 0.998932, 0.411504, 1.270951, 0.726597, 0.802427,
1.354037, 1.265239, 0.782349, 0.78666 , 1.231765, 0.931645,
0.793739, 0.797864, 0.59434 , 0.830584, 0.888593, 0.835981,
0.662846, 0.372863, 0.629388, 0.875347, -0.206508, 0.374656,
0.864203, 0.673541, 0.611431, 0.610227, 0.398388, 0.182321,
0.238973, -0.300663, 0.202904, 0.36229 , -0.399834, 0.134846,
-0.46481 , 0.578797, -0.177458, 0.416176, 0.502337, -0.262874,
-0.531168, -0.476578, 0.049585, -0.648642, -0.557033, -0.537415,
-0.38051 , -0.600608, -0.709828, -0.767893, -1.211113, -1.175812,
-0.948098, -1.091834, -0.814726, -0.499395, -0.98674 , -0.651322,
-0.922065, -0.713906, -1.268239, -0.787697, -1.071392, -1.356153,
-1.481535, -1.178269, -0.891799, -0.985956, -1.200543, -0.680796,
-1.305116, -0.588287, -0.804333, -0.662258, -0.036607, -0.501065,
-0.259611, -0.695071, -0.312524, -0.277099])
Coordinates:
* time (time) float64 0.0 0.06061 0.1212 0.1818 0.2424 0.303 0.3636 ...
plt.plot(da['time'], da, 'o')
[<matplotlib.lines.Line2D at 0x7f7a6f754048>]
Specifies the target axis and moving average pair for the rolling object.
da_rolling = da.rolling(time=3).mean() #Take a moving average every 3 points in the direction along the time axis
da_rolling
<xarray.DataArray (time: 100)>
array([ nan, nan, 0.054615, 0.097498, 0.113646, 0.408978,
0.393815, 0.31928 , 0.373931, 0.56394 , 0.653457, 0.513534,
0.638829, 0.598515, 0.638375, 0.746113, 0.624543, 0.660443,
0.424633, 0.690156, 0.61273 , 0.893796, 0.803017, 0.933325,
0.96102 , 1.140568, 1.133875, 0.944749, 0.933591, 0.983356,
0.985716, 0.841082, 0.728648, 0.740929, 0.771172, 0.851719,
0.795807, 0.623897, 0.555032, 0.625866, 0.432742, 0.347832,
0.344117, 0.637467, 0.716392, 0.631733, 0.540015, 0.396979,
0.273227, 0.04021 , 0.047071, 0.088177, 0.05512 , 0.032434,
-0.243266, 0.082945, -0.021157, 0.272505, 0.247018, 0.218546,
-0.097235, -0.42354 , -0.319387, -0.358545, -0.385363, -0.58103 ,
-0.491653, -0.506178, -0.563649, -0.692776, -0.896278, -1.051606,
-1.111675, -1.071915, -0.951553, -0.801985, -0.766954, -0.712486,
-0.853376, -0.762431, -0.96807 , -0.92328 , -1.042442, -1.071747,
-1.303026, -1.338652, -1.183868, -1.018675, -1.0261 , -0.955765,
-1.062152, -0.858066, -0.899245, -0.684959, -0.501066, -0.399977,
-0.265761, -0.485249, -0.422402, -0.428231])
Coordinates:
* time (time) float64 0.0 0.06061 0.1212 0.1818 0.2424 0.303 0.3636 ...
plt.plot(da['time'], da, 'o', alpha=0.3)
plt.plot(da['time'], da.rolling(time=5).mean(), '-', label='window: 5')
plt.plot(da['time'], da.rolling(time=10).mean(), '--', label='window: 10')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a7180c908>
Like pandas
, the rolling
method returns the Rolling
class.
da.rolling(time=3)
DataArrayRolling [window->3,center->False,dim->time]
The argument to create a rolling
object is
rolling
for a single axis is supported.)Others,
nan
.plt.plot(da['time'], da, 'o', alpha=0.3)
plt.plot(da['time'], da.rolling(time=10).mean(), '--', label='not centered')
plt.plot(da['time'], da.rolling(center=True, time=10).mean(), '-', label='centered')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a6f69fb70>
Corresponds to numpy
methods such as'argmax','argmin','max','min','mean','prod','sum','std','var',' median'
I am.
further. There is also a reduce (func, ** kwargs)
method.
For example
da_35percent = da.rolling(time=20, center=True).reduce(np.percentile, q=35)
da_65percent = da.rolling(time=20, center=True).reduce(np.percentile, q=65)
plt.plot(da['time'], da, 'o', alpha=0.3)
plt.plot(da['time'], da_35percent, '--', label='35% in 20 points')
plt.plot(da['time'], da_65percent, '--', label='65% in 20 points')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a6f7a5a20>
You can pass any function object like this.
However, the function object to be passed is like the numpy
method above.
It must correspond to ʻaxis`, which specifies which axis to reduce.
A simple application is convolution.
Where $ f_i $ is the convolved data $ g_n $ is the convolution data of size $ N $.
In the following, we will try to convolve a wavelet-like function.
# functor to apply convolution
def conv(src, obj, axis=0):
"""
+ src: target data to be convoluted
+ obj: convoluting data
+ axis: which axis of src to convolute
"""
if src.shape[axis] == obj.shape[0]:
return np.sum(src*obj, axis=axis)
else:
return 0.0
# convoluting data
n_window = 20
conv_cos = np.cos(2.0*np.pi * 2.0 * np.arange(n_window) / n_window) * np.sin(np.pi * np.arange(n_window) / n_window)
conv_sin = np.sin(2.0*np.pi * 2.0 * np.arange(n_window) / n_window) * np.sin(np.pi * np.arange(n_window) / n_window)
plt.plot(conv_cos, label='conv_cos')
plt.plot(conv_sin, label='conv_sin')
plt.legend(loc='best')
<matplotlib.legend.Legend at 0x7f7a6f585b00>
da_cos = da.rolling(time=20, center=True).reduce(conv, obj=conv_cos)
da_sin = da.rolling(time=20, center=True).reduce(conv, obj=conv_sin)
plt.plot(da['time'], da, 'o', alpha=0.3)
plt.plot(da['time'], da_cos, '--', label='conv_cos')
plt.plot(da['time'], da_sin, '--', label='conv_sin')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a6f5c1978>
Dataset.rolling
Xr.Dataset
, which is a collection of multiple xr.DataArray
s, also supports rolling
.
ds = xr.Dataset({'sin': ('time', np.sin(np.linspace(0,6,100)) + np.random.randn(100)*0.3),
'cos': ('time', np.cos(np.linspace(0,6,100)) + np.random.randn(100)*0.3)},
coords={'time':np.linspace(0,6,100)})
ds
<xarray.Dataset>
Dimensions: (time: 100)
Coordinates:
* time (time) float64 0.0 0.06061 0.1212 0.1818 0.2424 0.303 0.3636 ...
Data variables:
sin (time) float64 0.5826 -0.04678 -0.1069 -0.05271 0.08796 0.1871 ...
cos (time) float64 0.7281 0.9172 1.015 0.9138 0.4236 0.8481 0.8641 ...
plt.plot(ds['sin'], 'o', label='sin')
plt.plot(ds['cos'], 'o', label='cos')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a6f4a6518>
ds_mean = ds.rolling(time=10).mean()
plt.plot(ds['sin'], 'bo', label='sin', alpha=0.3)
plt.plot(ds_mean['sin'], '-b', label='sin_rolling')
plt.plot(ds['cos'], 'go', label='cos', alpha=0.3)
plt.plot(ds_mean['cos'], '-g', label='cos_rolling')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a6f4b9080>
binning
A feature similar to rolling
is binning
.
This can be achieved with the group_bins
operation.
It corresponds to pandas.groupby_bins
.
Multidimensional data The same operation can be repeated for non-overlapping subsections along an axis.
To divide the above da
into 10 equal parts in the time direction and find the average value among them,
Pass the axis label in the group
argument and an equal number in the bins
argument.
Finally, by executing the mean
method, a new xr.DataArray
is created.
da_bin_mean = da.groupby_bins(group='time', bins=10).mean()
da_bin_mean
<xarray.DataArray (time_bins: 10)>
array([ 0.294866, 0.642766, 0.956317, 0.728154, 0.344657, 0.089237,
-0.516009, -0.901501, -1.090238, -0.544197])
Coordinates:
* time_bins (time_bins) object '(-0.006, 0.6]' '(0.6, 1.2]' '(1.2, 1.8]' ...
The newly created DataArray
contains points whose contents correspond to the mean values and whose axes correspond to the values at both ends of the small interval.
In actual use, you may want to operate on a specified interval.
In that case, give the interval break point to the bins
argument. further,
bins = np.linspace(0,6,11) #When dividing into 10 pieces, pass 11 arrays that are the break points of the small section including both ends.
bin_labels = bins[:-1] + 0.3 #Prepare axes corresponding to 10 points.
da_bin_mean = da.groupby_bins(group='time', bins=bins, labels=bin_labels).mean()
da_bin_mean
<xarray.DataArray (time_bins: 10)>
array([ 0.351751, 0.642766, 0.956317, 0.728154, 0.344657, 0.089237,
-0.516009, -0.901501, -1.090238, -0.544197])
Coordinates:
* time_bins (time_bins) float64 0.3 0.9 1.5 2.1 2.7 3.3 3.9 4.5 5.1 5.7
plt.plot(da['time'], da, 'o', label='original', alpha=0.3)
plt.plot(da_bin_mean['time_bins'], da_bin_mean, '-o', label='binned')
plt.legend()
<matplotlib.legend.Legend at 0x7f7a6f5a9748>
Recommended Posts