Skip to content

Commit 908ba65

Browse files
committed
Fixed #206 Add nan/inf support to FLOSS
1 parent e0a4d22 commit 908ba65

File tree

2 files changed

+98
-14
lines changed

2 files changed

+98
-14
lines changed

stumpy/floss.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,10 @@ def __init__(
429429
self._n = self._T.shape[0]
430430
self._last_idx = self._n - self._m + 1 # Depends on the changing length of `T`
431431
self._n_appended = 0
432+
self._T_isfinite = np.isfinite(self._T)
433+
self._finite_T = self._T.copy()
434+
self._finite_T[~np.isfinite(self._finite_T)] = 0.0
435+
self._finite_Q = self._finite_T[-self._m :].copy()
432436

433437
if self._custom_iac is None: # pragma: no cover
434438
self._custom_iac = _iac(
@@ -482,8 +486,15 @@ def update(self, t):
482486
Segmentation (FLOSS).
483487
"""
484488
self._T[:-1] = self._T[1:]
489+
self._T_isfinite[:-1] = self._T_isfinite[1:]
490+
self._finite_T[:-1] = self._finite_T[1:]
491+
self._finite_Q[:-1] = self._finite_Q[1:]
485492
self._T[-1] = t
486-
Q = self._T[-self._m :]
493+
self._T_isfinite[-1] = np.isfinite(t)
494+
self._finite_T[-1] = t
495+
if not np.isfinite(t):
496+
self._finite_T[-1] = 0.0
497+
self._finite_Q[-1] = self._finite_T[-1]
487498
excl_zone = int(np.ceil(self._m / 4))
488499
# Note that the start of the exclusion zone is relative to
489500
# the unchanging length of the matrix profile index
@@ -499,9 +510,16 @@ def update(self, t):
499510
# Ingress
500511
M_T, Σ_T = core.compute_mean_std(self._T, self._m)
501512

502-
D = core.mass(Q, self._T, M_T, Σ_T)
513+
D = core.mass(self._finite_Q, self._finite_T, M_T, Σ_T)
503514
D[zone_start:] = np.inf
504515

516+
T_subseq_isfinite = np.all(
517+
core.rolling_window(self._T_isfinite, self._m), axis=1
518+
)
519+
D[~T_subseq_isfinite] = np.inf
520+
if not T_subseq_isfinite[-1]:
521+
D[:] = np.inf
522+
505523
# Update nearest neighbor for old data if any old subsequences
506524
# are closer to the newly arrived subsequence
507525
update_idx = np.argwhere(D < self._mp[:, 0]).flatten()
@@ -538,7 +556,7 @@ def I_(self):
538556
"""
539557
Get the updated (right) matrix profile indices
540558
"""
541-
return self._mp[:, 3].astype(np.float)
559+
return self._mp[:, 3].astype(np.int)
542560

543561
@property
544562
def T_(self):

tests/test_floss.py

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ def naive_right_mp(data, m):
5959
return mp
6060

6161

62-
def naive_distance_profile(Q, T, m):
63-
D = np.linalg.norm(
64-
core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1
65-
)
66-
return D
67-
68-
6962
def naive_rea(cac, n_regimes, L, excl_factor):
7063
cac_list = cac.tolist()
7164
loc_regimes = [None] * (n_regimes - 1)
@@ -81,6 +74,9 @@ def naive_rea(cac, n_regimes, L, excl_factor):
8174

8275
test_data = [(np.random.randint(0, 50, size=50, dtype=np.int))]
8376

77+
substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])]
78+
substitution_values = [np.nan, np.inf]
79+
8480

8581
@pytest.mark.parametrize("I", test_data)
8682
def test_nnmark(I):
@@ -138,9 +134,8 @@ def test_fluss(I):
138134
def test_floss():
139135
data = np.random.uniform(-1000, 1000, [64])
140136
m = 5
141-
old_data = data[:30]
142-
n = old_data.shape[0]
143-
add_data = data[30:]
137+
n = 30
138+
old_data = data[:n]
144139

145140
mp = naive_right_mp(old_data, m)
146141
comp_mp = stump(old_data, m)
@@ -161,7 +156,7 @@ def test_floss():
161156
mp[-1, 0] = np.inf
162157
mp[-1, 3] = last_idx + i
163158

164-
D = naive_distance_profile(ref_T[-m:], ref_T, m)
159+
D = naive.distance_profile(ref_T[-m:], ref_T, m)
165160
D[zone_start:] = np.inf
166161

167162
update_idx = np.argwhere(D < mp[:, 0]).flatten()
@@ -193,3 +188,74 @@ def test_floss():
193188
npt.assert_almost_equal(ref_P, comp_P)
194189
npt.assert_almost_equal(ref_I, comp_I)
195190
npt.assert_almost_equal(ref_T, comp_T)
191+
192+
193+
@pytest.mark.parametrize("substitute", substitution_values)
194+
@pytest.mark.parametrize("substitution_locations", substitution_locations)
195+
def test_floss_inf_nan(substitute, substitution_locations):
196+
T = np.random.uniform(-1000, 1000, [64])
197+
m = 5
198+
n = 30
199+
data = T.copy()
200+
for substitution_location in substitution_locations:
201+
data[:] = T[:]
202+
data[substitution_location] = substitute
203+
old_data = data[:n]
204+
205+
mp = naive_right_mp(old_data, m)
206+
comp_mp = stump(old_data, m)
207+
k = mp.shape[0]
208+
209+
rolling_Ts = core.rolling_window(data[1:], n)
210+
L = 5
211+
excl_factor = 1
212+
custom_iac = _iac(k, bidirectional=False)
213+
stream = floss(comp_mp, old_data, m, L, excl_factor, custom_iac=custom_iac)
214+
last_idx = n - m + 1
215+
excl_zone = int(np.ceil(m / 4))
216+
zone_start = max(0, k - excl_zone)
217+
for i, ref_T in enumerate(rolling_Ts):
218+
mp[:, 1] = -1
219+
mp[:, 2] = -1
220+
mp[:] = np.roll(mp, -1, axis=0)
221+
mp[-1, 0] = np.inf
222+
mp[-1, 3] = last_idx + i
223+
224+
D = naive.distance_profile(ref_T[-m:], ref_T, m)
225+
D[zone_start:] = np.inf
226+
227+
ref_T_isfinite = np.isfinite(ref_T)
228+
ref_T_subseq_isfinite = np.all(
229+
core.rolling_window(ref_T_isfinite, m), axis=1
230+
)
231+
232+
D[~ref_T_subseq_isfinite] = np.inf
233+
update_idx = np.argwhere(D < mp[:, 0]).flatten()
234+
mp[update_idx, 0] = D[update_idx]
235+
mp[update_idx, 3] = last_idx + i
236+
237+
ref_cac_1d = _cac(
238+
mp[:, 3] - i - 1,
239+
L,
240+
bidirectional=False,
241+
excl_factor=excl_factor,
242+
custom_iac=custom_iac,
243+
)
244+
245+
ref_mp = mp.copy()
246+
ref_P = ref_mp[:, 0]
247+
ref_I = ref_mp[:, 3]
248+
249+
stream.update(ref_T[-1])
250+
comp_cac_1d = stream.cac_1d_
251+
comp_P = stream.P_
252+
comp_I = stream.I_
253+
comp_T = stream.T_
254+
255+
naive.replace_inf(ref_P)
256+
naive.replace_inf(comp_P)
257+
258+
npt.assert_almost_equal(ref_cac_1d, comp_cac_1d)
259+
npt.assert_almost_equal(ref_P, comp_P)
260+
npt.assert_almost_equal(ref_I, comp_I)
261+
npt.assert_almost_equal(ref_T, comp_T)

0 commit comments

Comments
 (0)