Fixed #206 Add nan/inf support to FLOSS

seanlaw · seanlaw · commit 908ba6513bf5 · 2020-10-18T22:18:43.000-04:00
diff --git a/stumpy/floss.py b/stumpy/floss.py
@@ -429,6 +429,10 @@ def __init__(
         self._n = self._T.shape[0]
         self._last_idx = self._n - self._m + 1  # Depends on the changing length of `T`
         self._n_appended = 0
+        self._T_isfinite = np.isfinite(self._T)
+        self._finite_T = self._T.copy()
+        self._finite_T[~np.isfinite(self._finite_T)] = 0.0
+        self._finite_Q = self._finite_T[-self._m :].copy()
 
         if self._custom_iac is None:  # pragma: no cover
             self._custom_iac = _iac(
@@ -482,8 +486,15 @@ def update(self, t):
         Segmentation (FLOSS).
         """
         self._T[:-1] = self._T[1:]
+        self._T_isfinite[:-1] = self._T_isfinite[1:]
+        self._finite_T[:-1] = self._finite_T[1:]
+        self._finite_Q[:-1] = self._finite_Q[1:]
         self._T[-1] = t
-        Q = self._T[-self._m :]
+        self._T_isfinite[-1] = np.isfinite(t)
+        self._finite_T[-1] = t
+        if not np.isfinite(t):
+            self._finite_T[-1] = 0.0
+        self._finite_Q[-1] = self._finite_T[-1]
         excl_zone = int(np.ceil(self._m / 4))
         # Note that the start of the exclusion zone is relative to
         # the unchanging length of the matrix profile index
@@ -499,9 +510,16 @@ def update(self, t):
         # Ingress
         M_T, Σ_T = core.compute_mean_std(self._T, self._m)
 
-        D = core.mass(Q, self._T, M_T, Σ_T)
+        D = core.mass(self._finite_Q, self._finite_T, M_T, Σ_T)
         D[zone_start:] = np.inf
 
+        T_subseq_isfinite = np.all(
+            core.rolling_window(self._T_isfinite, self._m), axis=1
+        )
+        D[~T_subseq_isfinite] = np.inf
+        if not T_subseq_isfinite[-1]:
+            D[:] = np.inf
+
         # Update nearest neighbor for old data if any old subsequences
         # are closer to the newly arrived subsequence
         update_idx = np.argwhere(D < self._mp[:, 0]).flatten()
@@ -538,7 +556,7 @@ def I_(self):
         """
         Get the updated (right) matrix profile indices
         """
-        return self._mp[:, 3].astype(np.float)
+        return self._mp[:, 3].astype(np.int)
 
     @property
     def T_(self):
diff --git a/tests/test_floss.py b/tests/test_floss.py
@@ -59,13 +59,6 @@ def naive_right_mp(data, m):
     return mp
 
 
-def naive_distance_profile(Q, T, m):
-    D = np.linalg.norm(
-        core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1
-    )
-    return D
-
-
 def naive_rea(cac, n_regimes, L, excl_factor):
     cac_list = cac.tolist()
     loc_regimes = [None] * (n_regimes - 1)
@@ -81,6 +74,9 @@ def naive_rea(cac, n_regimes, L, excl_factor):
 
 test_data = [(np.random.randint(0, 50, size=50, dtype=np.int))]
 
+substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])]
+substitution_values = [np.nan, np.inf]
+
 
 @pytest.mark.parametrize("I", test_data)
 def test_nnmark(I):
@@ -138,9 +134,8 @@ def test_fluss(I):
 def test_floss():
     data = np.random.uniform(-1000, 1000, [64])
     m = 5
-    old_data = data[:30]
-    n = old_data.shape[0]
-    add_data = data[30:]
+    n = 30
+    old_data = data[:n]
 
     mp = naive_right_mp(old_data, m)
     comp_mp = stump(old_data, m)
@@ -161,7 +156,7 @@ def test_floss():
         mp[-1, 0] = np.inf
         mp[-1, 3] = last_idx + i
 
-        D = naive_distance_profile(ref_T[-m:], ref_T, m)
+        D = naive.distance_profile(ref_T[-m:], ref_T, m)
         D[zone_start:] = np.inf
 
         update_idx = np.argwhere(D < mp[:, 0]).flatten()
@@ -193,3 +188,74 @@ def test_floss():
         npt.assert_almost_equal(ref_P, comp_P)
         npt.assert_almost_equal(ref_I, comp_I)
         npt.assert_almost_equal(ref_T, comp_T)
+
+
+@pytest.mark.parametrize("substitute", substitution_values)
+@pytest.mark.parametrize("substitution_locations", substitution_locations)
+def test_floss_inf_nan(substitute, substitution_locations):
+    T = np.random.uniform(-1000, 1000, [64])
+    m = 5
+    n = 30
+    data = T.copy()
+    for substitution_location in substitution_locations:
+        data[:] = T[:]
+        data[substitution_location] = substitute
+        old_data = data[:n]
+
+        mp = naive_right_mp(old_data, m)
+        comp_mp = stump(old_data, m)
+        k = mp.shape[0]
+
+        rolling_Ts = core.rolling_window(data[1:], n)
+        L = 5
+        excl_factor = 1
+        custom_iac = _iac(k, bidirectional=False)
+        stream = floss(comp_mp, old_data, m, L, excl_factor, custom_iac=custom_iac)
+        last_idx = n - m + 1
+        excl_zone = int(np.ceil(m / 4))
+        zone_start = max(0, k - excl_zone)
+        for i, ref_T in enumerate(rolling_Ts):
+            mp[:, 1] = -1
+            mp[:, 2] = -1
+            mp[:] = np.roll(mp, -1, axis=0)
+            mp[-1, 0] = np.inf
+            mp[-1, 3] = last_idx + i
+
+            D = naive.distance_profile(ref_T[-m:], ref_T, m)
+            D[zone_start:] = np.inf
+
+            ref_T_isfinite = np.isfinite(ref_T)
+            ref_T_subseq_isfinite = np.all(
+                core.rolling_window(ref_T_isfinite, m), axis=1
+            )
+
+            D[~ref_T_subseq_isfinite] = np.inf
+            update_idx = np.argwhere(D < mp[:, 0]).flatten()
+            mp[update_idx, 0] = D[update_idx]
+            mp[update_idx, 3] = last_idx + i
+
+            ref_cac_1d = _cac(
+                mp[:, 3] - i - 1,
+                L,
+                bidirectional=False,
+                excl_factor=excl_factor,
+                custom_iac=custom_iac,
+            )
+
+            ref_mp = mp.copy()
+            ref_P = ref_mp[:, 0]
+            ref_I = ref_mp[:, 3]
+
+            stream.update(ref_T[-1])
+            comp_cac_1d = stream.cac_1d_
+            comp_P = stream.P_
+            comp_I = stream.I_
+            comp_T = stream.T_
+
+            naive.replace_inf(ref_P)
+            naive.replace_inf(comp_P)
+
+            npt.assert_almost_equal(ref_cac_1d, comp_cac_1d)
+            npt.assert_almost_equal(ref_P, comp_P)
+            npt.assert_almost_equal(ref_I, comp_I)
+            npt.assert_almost_equal(ref_T, comp_T)