Skip to content

Commit f324dec

Browse files
committed
Fixed #205 Converted SCRUMP to OOP
1 parent c289836 commit f324dec

File tree

3 files changed

+396
-191
lines changed

3 files changed

+396
-191
lines changed

docs/Tutorial_Fast_Approximate_Matrix_Profiles.ipynb

Lines changed: 49 additions & 29 deletions
Large diffs are not rendered by default.

stumpy/scrump.py

Lines changed: 175 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -564,13 +564,10 @@ def prescrump(T_A, m, T_B=None, s=None):
564564
return P, I
565565

566566

567-
def scrump(
568-
T_A, m, T_B=None, ignore_trivial=True, percentage=0.01, pre_scrump=False, s=None
569-
):
567+
class scrump(object):
570568
"""
571-
Compute the approximate matrix profile with parallelized SCRIMP. This returns a
572-
generator that can be incrementally iterated on. For SCRIMP++, set
573-
`pre_scrump=True`.
569+
Compute the approximate matrix profile with the parallelized SCRIMP algorthm. For
570+
SCRIMP++, set `pre_scrump=True`.
574571
575572
This is a convenience wrapper around the Numba JIT-compiled parallelized
576573
`_scrump` function which computes the matrix profile according to SCRIMP.
@@ -604,10 +601,19 @@ def scrump(
604601
then `s` will automatically be set to `s=int(np.ceil(m/4))`, the size of
605602
the exclusion zone.
606603
607-
Returns
604+
Attributes
605+
----------
606+
P_ : ndarray
607+
The updated matrix profile
608+
609+
I_ : ndarray
610+
The updated matrix profile indices
611+
612+
Methods
608613
-------
609-
out : ndarray
610-
Matrix profile and matrix profile indices
614+
update()
615+
Update the matrix profile and matrix profile indices by computing additional
616+
(as defined by `percentage`) new distances that make up the full distance matrix
611617
612618
Notes
613619
-----
@@ -616,85 +622,177 @@ def scrump(
616622
617623
See Algorithm 1 and Algorithm 2
618624
"""
619-
if T_B is None:
620-
T_B = T_A
621-
ignore_trivial = True
622-
623-
T_A, M_T, Σ_T = core.preprocess(T_A, m)
624-
T_B, μ_Q, σ_Q = core.preprocess(T_B, m)
625625

626-
if T_A.ndim != 1: # pragma: no cover
627-
raise ValueError(
628-
f"T_A is {T_A.ndim}-dimensional and must be 1-dimensional. "
629-
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
630-
)
626+
def __init__(
627+
self,
628+
T_A,
629+
m,
630+
T_B=None,
631+
ignore_trivial=True,
632+
percentage=0.01,
633+
pre_scrump=False,
634+
s=None,
635+
):
636+
"""
637+
Initialize the `scrump` object
638+
639+
Parameters
640+
----------
641+
T_A : ndarray
642+
The time series or sequence for which to compute the matrix profile
643+
644+
T_B : ndarray
645+
The time series or sequence that contain your query subsequences
646+
of interest
647+
648+
m : int
649+
Window size
650+
651+
ignore_trivial : bool
652+
Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
653+
`False`. Default is `True`.
654+
655+
percentage : float
656+
Approximate percentage completed. The value is between 0.0 and 1.0.
657+
658+
pre_scrump : bool
659+
A flag for whether or not to perform the PreSCRIMP calculation prior to
660+
computing SCRIMP. If set to `True`, this is equivalent to computing
661+
SCRIMP++
662+
663+
s : int
664+
The size of the PreSCRIMP fixed interval. If `pre-scrump=True` and `s=None`,
665+
then `s` will automatically be set to `s=int(np.ceil(m/4))`, the size of
666+
the exclusion zone.
667+
"""
668+
self._ignore_trivial = ignore_trivial
669+
670+
if T_B is None:
671+
T_B = T_A
672+
self._ignore_trivial = True
673+
674+
self._m = m
675+
self._T_A, self._M_T, self._Σ_T = core.preprocess(T_A, self._m)
676+
self._T_B, self._μ_Q, self._σ_Q = core.preprocess(T_B, self._m)
677+
678+
if self._T_A.ndim != 1: # pragma: no cover
679+
raise ValueError(
680+
f"T_A is {self._T_A.ndim}-dimensional and must be 1-dimensional. "
681+
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
682+
)
631683

632-
if T_B.ndim != 1: # pragma: no cover
633-
raise ValueError(
634-
f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. "
635-
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
636-
)
684+
if self._T_B.ndim != 1: # pragma: no cover
685+
raise ValueError(
686+
f"T_B is {self._T_B.ndim}-dimensional and must be 1-dimensional. "
687+
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
688+
)
637689

638-
core.check_dtype(T_A)
639-
core.check_dtype(T_B)
690+
core.check_dtype(self._T_A)
691+
core.check_dtype(self._T_B)
640692

641-
core.check_window_size(m)
693+
core.check_window_size(self._m)
642694

643-
if ignore_trivial is False and core.are_arrays_equal(T_A, T_B): # pragma: no cover
644-
logger.warning("Arrays T_A, T_B are equal, which implies a self-join.")
645-
logger.warning("Try setting `ignore_trivial = True`.")
695+
if self._ignore_trivial is False and core.are_arrays_equal(
696+
self._T_A, self._T_B
697+
): # pragma: no cover
698+
logger.warning("Arrays T_A, T_B are equal, which implies a self-join.")
699+
logger.warning("Try setting `ignore_trivial = True`.")
646700

647-
if ignore_trivial and core.are_arrays_equal(T_A, T_B) is False: # pragma: no cover
648-
logger.warning("Arrays T_A, T_B are not equal, which implies an AB-join.")
649-
logger.warning("Try setting `ignore_trivial = False`.")
701+
if (
702+
self._ignore_trivial
703+
and core.are_arrays_equal(self._T_A, self._T_B) is False
704+
): # pragma: no cover
705+
logger.warning("Arrays T_A, T_B are not equal, which implies an AB-join.")
706+
logger.warning("Try setting `ignore_trivial = False`.")
650707

651-
n_A = T_A.shape[0]
652-
n_B = T_B.shape[0]
653-
l = n_B - m + 1
708+
self._n_A = self._T_A.shape[0]
709+
self._n_B = self._T_B.shape[0]
710+
self._l = self._n_B - self._m + 1
654711

655-
out = np.empty((l, 2), dtype=object)
656-
out[:, 0] = np.inf
657-
out[:, 1] = -1
712+
self._P = np.empty(self._l, dtype=np.float64)
713+
self._I = np.empty(self._l, dtype=np.int64)
714+
self._P[:] = np.inf
715+
self._I[:] = -1
658716

659-
excl_zone = int(np.ceil(m / 4))
717+
self._excl_zone = int(np.ceil(self._m / 4))
660718

661-
if s is None:
662-
s = excl_zone
719+
if s is None:
720+
s = self._excl_zone
663721

664-
if pre_scrump:
665-
if ignore_trivial:
666-
P, I = prescrump(T_A, m, s=s)
722+
if pre_scrump:
723+
if self._ignore_trivial:
724+
P, I = prescrump(self._T_A, self._m, s=s)
725+
else:
726+
P, I = prescrump(T_A, m, T_B=T_B, s=s)
727+
for i in range(P.shape[0]):
728+
if self._P[i] > P[i]:
729+
self._P[i] = P[i]
730+
self._I[i] = I[i]
731+
732+
if self._ignore_trivial:
733+
self._orders = np.random.permutation(
734+
range(self._excl_zone + 1, self._n_B - self._m + 1)
735+
)
667736
else:
668-
P, I = prescrump(T_A, m, T_B=T_B, s=s)
669-
for i in range(P.shape[0]):
670-
if out[i, 0] > P[i]:
671-
out[i, 0] = P[i]
672-
out[i, 1] = I[i]
673-
674-
if ignore_trivial:
675-
orders = np.random.permutation(range(excl_zone + 1, n_B - m + 1))
676-
else:
677-
orders = np.random.permutation(range(-(n_B - m + 1) + 1, n_A - m + 1))
678-
679-
n_threads = config.NUMBA_NUM_THREADS
680-
percentage = min(percentage, 1.0)
681-
percentage = max(percentage, 0.0)
682-
generator_rounds = int(np.ceil(1.0 / percentage))
683-
start = 0
684-
for round in range(generator_rounds):
685-
orders_ranges = _get_orders_ranges(
686-
n_threads, m, n_A, n_B, orders, start, percentage
687-
)
688-
689-
P, I = _scrump(
690-
T_A, T_B, m, M_T, Σ_T, μ_Q, σ_Q, orders, orders_ranges, ignore_trivial
691-
)
692-
start = orders_ranges[:, 1].max()
737+
self._orders = np.random.permutation(
738+
range(-(self._n_B - self._m + 1) + 1, self._n_A - self._m + 1)
739+
)
693740

694-
# Update matrix profile and indices
695-
for i in range(out.shape[0]):
696-
if out[i, 0] > P[i]:
697-
out[i, 0] = P[i]
698-
out[i, 1] = I[i]
741+
self._n_threads = config.NUMBA_NUM_THREADS
742+
self._percentage = min(percentage, 1.0)
743+
self._percentage = max(percentage, 0.0)
744+
self._n_chunks = int(np.ceil(1.0 / percentage))
745+
self._chunk = 1
746+
self._start = 0
747+
748+
def update(self):
749+
"""
750+
Update the matrix profile and matrix profile indices by computing additional
751+
(as defined by `percentage`) new distances that make up the full distance matrix
752+
"""
753+
if self._chunk <= self._n_chunks:
754+
orders_ranges = _get_orders_ranges(
755+
self._n_threads,
756+
self._m,
757+
self._n_A,
758+
self._n_B,
759+
self._orders,
760+
self._start,
761+
self._percentage,
762+
)
699763

700-
yield out
764+
P, I = _scrump(
765+
self._T_A,
766+
self._T_B,
767+
self._m,
768+
self._M_T,
769+
self._Σ_T,
770+
self._μ_Q,
771+
self._σ_Q,
772+
self._orders,
773+
orders_ranges,
774+
self._ignore_trivial,
775+
)
776+
self._start = orders_ranges[:, 1].max()
777+
778+
# Update matrix profile and indices
779+
for i in range(self._P.shape[0]):
780+
if self._P[i] > P[i]:
781+
self._P[i] = P[i]
782+
self._I[i] = I[i]
783+
784+
self._chunk += 1
785+
786+
@property
787+
def P_(self):
788+
"""
789+
Get the updated matrix profile
790+
"""
791+
return self._P.astype(np.float)
792+
793+
@property
794+
def I_(self):
795+
"""
796+
Get the updated matrix profile indices
797+
"""
798+
return self._I.astype(np.int)

0 commit comments

Comments
 (0)