Skip to content

Commit d18d8c9

Browse files
committed
Fixed #308 Transpose mstump/mstumped output
1 parent 6b037f1 commit d18d8c9

File tree

5 files changed

+142
-156
lines changed

5 files changed

+142
-156
lines changed

docs/Tutorial_Multidimensional_Motif_Discovery.ipynb

Lines changed: 111 additions & 125 deletions
Large diffs are not rendered by default.

stumpy/mstump.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,9 @@ def _discretize(a, bins, right=True):
192192
return np.digitize(a, bins, right=right)
193193

194194

195-
def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
195+
def _get_subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
196196
"""
197-
Compute the multi-dimensional matrix profile subspace for a given motif index and
197+
Compute the multi-dimensional matrix profile subspace for a given subseq index and
198198
its nearest neighbor index
199199
200200
Parameters
@@ -206,8 +206,8 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
206206
m : int
207207
Window size
208208
209-
motif_idx : int
210-
The motif index in T
209+
subseq_idx : int
210+
The subsequence index in T
211211
212212
nn_idx : int
213213
The nearest neighbor index in T
@@ -236,9 +236,9 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
236236
"""
237237
T, _, _ = core.preprocess(T, m)
238238

239-
motifs = core.z_norm(T[:, motif_idx : motif_idx + m], axis=1)
239+
subseqs = core.z_norm(T[:, subseq_idx : subseq_idx + m], axis=1)
240240
neighbors = core.z_norm(T[:, nn_idx : nn_idx + m], axis=1)
241-
D = np.linalg.norm(motifs - neighbors, axis=1)
241+
D = np.linalg.norm(subseqs - neighbors, axis=1)
242242

243243
if discords:
244244
sorted_idx = D[::-1].argsort(axis=0, kind="mergesort")
@@ -260,9 +260,9 @@ def _get_subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):
260260

261261
n_bit = 8
262262
bins = _inverse_norm()
263-
disc_motifs = _discretize(motifs[S], bins)
263+
disc_subseqs = _discretize(subseqs[S], bins)
264264
disc_neighbors = _discretize(neighbors[S], bins)
265-
n_val = np.unique(disc_motifs - disc_neighbors).shape[0]
265+
n_val = np.unique(disc_subseqs - disc_neighbors).shape[0]
266266
bit_size = n_bit * (T.shape[0] * m * 2 - k * m)
267267
bit_size = bit_size + k * m * np.log2(n_val) + n_val * n_bit
268268

@@ -811,12 +811,12 @@ def mstump(T, m, include=None, discords=False):
811811
Returns
812812
-------
813813
P : ndarray
814-
The multi-dimensional matrix profile. Each column of the array corresponds
815-
to each matrix profile for a given dimension (i.e., the first column is
816-
the 1-D matrix profile and the second column is the 2-D matrix profile).
814+
The multi-dimensional matrix profile. Each row of the array corresponds
815+
to each matrix profile for a given dimension (i.e., the first row is
816+
the 1-D matrix profile and the second row is the 2-D matrix profile).
817817
818818
I : ndarray
819-
The multi-dimensional matrix profile index where each column of the array
819+
The multi-dimensional matrix profile index where each row of the array
820820
corresponds to each matrix profile index for a given dimension.
821821
822822
Notes
@@ -874,4 +874,4 @@ def mstump(T, m, include=None, discords=False):
874874
discords,
875875
)
876876

877-
return P.T, I.T
877+
return P, I

stumpy/mstumped.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@ def mstumped(dask_client, T, m, include=None, discords=False):
5555
Returns
5656
-------
5757
P : ndarray
58-
The multi-dimensional matrix profile. Each column of the array corresponds
59-
to each matrix profile for a given dimension (i.e., the first column is
60-
the 1-D matrix profile and the second column is the 2-D matrix profile).
58+
The multi-dimensional matrix profile. Each row of the array corresponds
59+
to each matrix profile for a given dimension (i.e., the first row is
60+
the 1-D matrix profile and the second row is the 2-D matrix profile).
6161
6262
I : ndarray
63-
The multi-dimensional matrix profile index where each column of the array
63+
The multi-dimensional matrix profile index where each row of the array
6464
corresponds to each matrix profile index for a given dimension.
6565
6666
Notes
@@ -167,4 +167,4 @@ def mstumped(dask_client, T, m, include=None, discords=False):
167167
for future in futures:
168168
dask_client.cancel(future)
169169

170-
return P.T, I.T
170+
return P, I

tests/naive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def mstump(T, m, excl_zone, include=None, discords=False):
292292
P[dim, col_mask] = P_i[dim, col_mask]
293293
I[dim, col_mask] = I_i[dim, col_mask]
294294

295-
return P.T, I.T
295+
return P, I
296296

297297

298298
def subspace(T, m, motif_idx, nn_idx, k, include=None, discords=False):

tests/test_mstump.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ def test_query_mstump_profile(T, m):
8585
excl_zone = int(np.ceil(m / 4))
8686
for query_idx in range(T.shape[0] - m + 1):
8787
ref_P, ref_I = naive.mstump(T, m, excl_zone)
88-
ref_P = ref_P[query_idx, :]
89-
ref_I = ref_I[query_idx, :]
88+
ref_P = ref_P[:, query_idx]
89+
ref_I = ref_I[:, query_idx]
9090

9191
M_T, Σ_T = core.compute_mean_std(T, m)
9292
comp_P, comp_I = _query_mstump_profile(
@@ -103,8 +103,8 @@ def test_get_first_mstump_profile(T, m):
103103
start = 0
104104

105105
ref_P, ref_I = naive.mstump(T, m, excl_zone)
106-
ref_P = ref_P[start, :]
107-
ref_I = ref_I[start, :]
106+
ref_P = ref_P[:, start]
107+
ref_I = ref_I[:, start]
108108

109109
M_T, Σ_T = core.compute_mean_std(T, m)
110110
comp_P, comp_I = _get_first_mstump_profile(
@@ -161,29 +161,29 @@ def test_subspace_include(T, m):
161161

162162
@pytest.mark.parametrize("T, m", test_data)
163163
def test_subspace_discords(T, m):
164-
motif_idx = 1
164+
discord_idx = 1
165165
nn_idx = 4
166166

167167
for k in range(T.shape[0]):
168-
ref_S = naive.subspace(T, m, motif_idx, nn_idx, k, discords=True)
169-
comp_S = _get_subspace(T, m, motif_idx, nn_idx, k, discords=True)
168+
ref_S = naive.subspace(T, m, discord_idx, nn_idx, k, discords=True)
169+
comp_S = _get_subspace(T, m, discord_idx, nn_idx, k, discords=True)
170170
npt.assert_almost_equal(ref_S, comp_S)
171171

172172

173173
@pytest.mark.parametrize("T, m", test_data)
174174
def test_subspace_include_discords(T, m):
175-
motif_idx = 1
175+
discord_idx = 1
176176
nn_idx = 4
177177
for width in range(T.shape[0]):
178178
for i in range(T.shape[0] - width):
179179
include = np.asarray(range(i, i + width + 1))
180180

181181
for k in range(T.shape[0]):
182182
ref_S = naive.subspace(
183-
T, m, motif_idx, nn_idx, k, include, discords=True
183+
T, m, discord_idx, nn_idx, k, include, discords=True
184184
)
185185
comp_S = _get_subspace(
186-
T, m, motif_idx, nn_idx, k, include, discords=True
186+
T, m, discord_idx, nn_idx, k, include, discords=True
187187
)
188188
npt.assert_almost_equal(ref_S, comp_S)
189189

@@ -195,8 +195,8 @@ def test_naive_mstump():
195195
zone = int(np.ceil(m / 4))
196196

197197
ref_mp = naive.stamp(T[0], m, exclusion_zone=zone)
198-
ref_P = ref_mp[np.newaxis, :, 0].T
199-
ref_I = ref_mp[np.newaxis, :, 1].T
198+
ref_P = ref_mp[np.newaxis, :, 0]
199+
ref_I = ref_mp[np.newaxis, :, 1]
200200

201201
comp_P, comp_I = naive.mstump(T, m, zone)
202202

0 commit comments

Comments
 (0)