@@ -564,13 +564,10 @@ def prescrump(T_A, m, T_B=None, s=None):
564564 return P , I
565565
566566
567- def scrump (
568- T_A , m , T_B = None , ignore_trivial = True , percentage = 0.01 , pre_scrump = False , s = None
569- ):
567+ class scrump (object ):
570568 """
571- Compute the approximate matrix profile with parallelized SCRIMP. This returns a
572- generator that can be incrementally iterated on. For SCRIMP++, set
573- `pre_scrump=True`.
569+ Compute the approximate matrix profile with the parallelized SCRIMP algorthm. For
570+ SCRIMP++, set `pre_scrump=True`.
574571
575572 This is a convenience wrapper around the Numba JIT-compiled parallelized
576573 `_scrump` function which computes the matrix profile according to SCRIMP.
@@ -604,10 +601,19 @@ def scrump(
604601 then `s` will automatically be set to `s=int(np.ceil(m/4))`, the size of
605602 the exclusion zone.
606603
607- Returns
604+ Attributes
605+ ----------
606+ P_ : ndarray
607+ The updated matrix profile
608+
609+ I_ : ndarray
610+ The updated matrix profile indices
611+
612+ Methods
608613 -------
609- out : ndarray
610- Matrix profile and matrix profile indices
614+ update()
615+ Update the matrix profile and matrix profile indices by computing additional
616+ (as defined by `percentage`) new distances that make up the full distance matrix
611617
612618 Notes
613619 -----
@@ -616,85 +622,177 @@ def scrump(
616622
617623 See Algorithm 1 and Algorithm 2
618624 """
619- if T_B is None :
620- T_B = T_A
621- ignore_trivial = True
622-
623- T_A , M_T , Σ_T = core .preprocess (T_A , m )
624- T_B , μ_Q , σ_Q = core .preprocess (T_B , m )
625625
626- if T_A .ndim != 1 : # pragma: no cover
627- raise ValueError (
628- f"T_A is { T_A .ndim } -dimensional and must be 1-dimensional. "
629- "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
630- )
626+ def __init__ (
627+ self ,
628+ T_A ,
629+ m ,
630+ T_B = None ,
631+ ignore_trivial = True ,
632+ percentage = 0.01 ,
633+ pre_scrump = False ,
634+ s = None ,
635+ ):
636+ """
637+ Initialize the `scrump` object
638+
639+ Parameters
640+ ----------
641+ T_A : ndarray
642+ The time series or sequence for which to compute the matrix profile
643+
644+ T_B : ndarray
645+ The time series or sequence that contain your query subsequences
646+ of interest
647+
648+ m : int
649+ Window size
650+
651+ ignore_trivial : bool
652+ Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
653+ `False`. Default is `True`.
654+
655+ percentage : float
656+ Approximate percentage completed. The value is between 0.0 and 1.0.
657+
658+ pre_scrump : bool
659+ A flag for whether or not to perform the PreSCRIMP calculation prior to
660+ computing SCRIMP. If set to `True`, this is equivalent to computing
661+ SCRIMP++
662+
663+ s : int
664+ The size of the PreSCRIMP fixed interval. If `pre-scrump=True` and `s=None`,
665+ then `s` will automatically be set to `s=int(np.ceil(m/4))`, the size of
666+ the exclusion zone.
667+ """
668+ self ._ignore_trivial = ignore_trivial
669+
670+ if T_B is None :
671+ T_B = T_A
672+ self ._ignore_trivial = True
673+
674+ self ._m = m
675+ self ._T_A , self ._M_T , self ._Σ_T = core .preprocess (T_A , self ._m )
676+ self ._T_B , self ._μ_Q , self ._σ_Q = core .preprocess (T_B , self ._m )
677+
678+ if self ._T_A .ndim != 1 : # pragma: no cover
679+ raise ValueError (
680+ f"T_A is { self ._T_A .ndim } -dimensional and must be 1-dimensional. "
681+ "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
682+ )
631683
632- if T_B .ndim != 1 : # pragma: no cover
633- raise ValueError (
634- f"T_B is { T_B .ndim } -dimensional and must be 1-dimensional. "
635- "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
636- )
684+ if self . _T_B .ndim != 1 : # pragma: no cover
685+ raise ValueError (
686+ f"T_B is { self . _T_B .ndim } -dimensional and must be 1-dimensional. "
687+ "For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
688+ )
637689
638- core .check_dtype (T_A )
639- core .check_dtype (T_B )
690+ core .check_dtype (self . _T_A )
691+ core .check_dtype (self . _T_B )
640692
641- core .check_window_size (m )
693+ core .check_window_size (self . _m )
642694
643- if ignore_trivial is False and core .are_arrays_equal (T_A , T_B ): # pragma: no cover
644- logger .warning ("Arrays T_A, T_B are equal, which implies a self-join." )
645- logger .warning ("Try setting `ignore_trivial = True`." )
695+ if self ._ignore_trivial is False and core .are_arrays_equal (
696+ self ._T_A , self ._T_B
697+ ): # pragma: no cover
698+ logger .warning ("Arrays T_A, T_B are equal, which implies a self-join." )
699+ logger .warning ("Try setting `ignore_trivial = True`." )
646700
647- if ignore_trivial and core .are_arrays_equal (T_A , T_B ) is False : # pragma: no cover
648- logger .warning ("Arrays T_A, T_B are not equal, which implies an AB-join." )
649- logger .warning ("Try setting `ignore_trivial = False`." )
701+ if (
702+ self ._ignore_trivial
703+ and core .are_arrays_equal (self ._T_A , self ._T_B ) is False
704+ ): # pragma: no cover
705+ logger .warning ("Arrays T_A, T_B are not equal, which implies an AB-join." )
706+ logger .warning ("Try setting `ignore_trivial = False`." )
650707
651- n_A = T_A .shape [0 ]
652- n_B = T_B .shape [0 ]
653- l = n_B - m + 1
708+ self . _n_A = self . _T_A .shape [0 ]
709+ self . _n_B = self . _T_B .shape [0 ]
710+ self . _l = self . _n_B - self . _m + 1
654711
655- out = np .empty ((l , 2 ), dtype = object )
656- out [:, 0 ] = np .inf
657- out [:, 1 ] = - 1
712+ self ._P = np .empty (self ._l , dtype = np .float64 )
713+ self ._I = np .empty (self ._l , dtype = np .int64 )
714+ self ._P [:] = np .inf
715+ self ._I [:] = - 1
658716
659- excl_zone = int (np .ceil (m / 4 ))
717+ self . _excl_zone = int (np .ceil (self . _m / 4 ))
660718
661- if s is None :
662- s = excl_zone
719+ if s is None :
720+ s = self . _excl_zone
663721
664- if pre_scrump :
665- if ignore_trivial :
666- P , I = prescrump (T_A , m , s = s )
722+ if pre_scrump :
723+ if self ._ignore_trivial :
724+ P , I = prescrump (self ._T_A , self ._m , s = s )
725+ else :
726+ P , I = prescrump (T_A , m , T_B = T_B , s = s )
727+ for i in range (P .shape [0 ]):
728+ if self ._P [i ] > P [i ]:
729+ self ._P [i ] = P [i ]
730+ self ._I [i ] = I [i ]
731+
732+ if self ._ignore_trivial :
733+ self ._orders = np .random .permutation (
734+ range (self ._excl_zone + 1 , self ._n_B - self ._m + 1 )
735+ )
667736 else :
668- P , I = prescrump (T_A , m , T_B = T_B , s = s )
669- for i in range (P .shape [0 ]):
670- if out [i , 0 ] > P [i ]:
671- out [i , 0 ] = P [i ]
672- out [i , 1 ] = I [i ]
673-
674- if ignore_trivial :
675- orders = np .random .permutation (range (excl_zone + 1 , n_B - m + 1 ))
676- else :
677- orders = np .random .permutation (range (- (n_B - m + 1 ) + 1 , n_A - m + 1 ))
678-
679- n_threads = config .NUMBA_NUM_THREADS
680- percentage = min (percentage , 1.0 )
681- percentage = max (percentage , 0.0 )
682- generator_rounds = int (np .ceil (1.0 / percentage ))
683- start = 0
684- for round in range (generator_rounds ):
685- orders_ranges = _get_orders_ranges (
686- n_threads , m , n_A , n_B , orders , start , percentage
687- )
688-
689- P , I = _scrump (
690- T_A , T_B , m , M_T , Σ_T , μ_Q , σ_Q , orders , orders_ranges , ignore_trivial
691- )
692- start = orders_ranges [:, 1 ].max ()
737+ self ._orders = np .random .permutation (
738+ range (- (self ._n_B - self ._m + 1 ) + 1 , self ._n_A - self ._m + 1 )
739+ )
693740
694- # Update matrix profile and indices
695- for i in range (out .shape [0 ]):
696- if out [i , 0 ] > P [i ]:
697- out [i , 0 ] = P [i ]
698- out [i , 1 ] = I [i ]
741+ self ._n_threads = config .NUMBA_NUM_THREADS
742+ self ._percentage = min (percentage , 1.0 )
743+ self ._percentage = max (percentage , 0.0 )
744+ self ._n_chunks = int (np .ceil (1.0 / percentage ))
745+ self ._chunk = 1
746+ self ._start = 0
747+
748+ def update (self ):
749+ """
750+ Update the matrix profile and matrix profile indices by computing additional
751+ (as defined by `percentage`) new distances that make up the full distance matrix
752+ """
753+ if self ._chunk <= self ._n_chunks :
754+ orders_ranges = _get_orders_ranges (
755+ self ._n_threads ,
756+ self ._m ,
757+ self ._n_A ,
758+ self ._n_B ,
759+ self ._orders ,
760+ self ._start ,
761+ self ._percentage ,
762+ )
699763
700- yield out
764+ P , I = _scrump (
765+ self ._T_A ,
766+ self ._T_B ,
767+ self ._m ,
768+ self ._M_T ,
769+ self ._Σ_T ,
770+ self ._μ_Q ,
771+ self ._σ_Q ,
772+ self ._orders ,
773+ orders_ranges ,
774+ self ._ignore_trivial ,
775+ )
776+ self ._start = orders_ranges [:, 1 ].max ()
777+
778+ # Update matrix profile and indices
779+ for i in range (self ._P .shape [0 ]):
780+ if self ._P [i ] > P [i ]:
781+ self ._P [i ] = P [i ]
782+ self ._I [i ] = I [i ]
783+
784+ self ._chunk += 1
785+
786+ @property
787+ def P_ (self ):
788+ """
789+ Get the updated matrix profile
790+ """
791+ return self ._P .astype (np .float )
792+
793+ @property
794+ def I_ (self ):
795+ """
796+ Get the updated matrix profile indices
797+ """
798+ return self ._I .astype (np .int )
0 commit comments