Source code for stumpy.mstumped

# STUMPY# Copyright 2019 TD Ameritrade. Released under the terms of the 3-Clause BSD license.# STUMPY is a trademark of TD Ameritrade IP Company, Inc. All rights reserved.importmathimportnumpyasnpfrom.importconfig,corefrom.maampedimportmaampedfrom.mmparrayimportmparrayfrom.mstumpimport_get_first_mstump_profile,_get_multi_QT,_mstumpdef_dask_mstumped(dask_client,T_A,T_B,m,excl_zone,M_T,Σ_T,μ_Q,σ_Q,T_subseq_isconstant,Q_subseq_isconstant,include,discords,):"""    Compute the multi-dimensional z-normalized matrix profile with a `dask` cluster    This is a highly distributed implementation around the Numba JIT-compiled    parallelized `_mstump` function which computes the multi-dimensional matrix    profile according to STOMP. Note that only self-joins are supported.    Parameters    ----------    dask_client : client        A ``dask`` client. Setting up a ``dask`` cluster is beyond        the scope of this library. Please refer to the ``dask``        documentation.    T_A : numpy.ndarray        The time series or sequence for which to compute the multi-dimensional        matrix profile. Each row in `T_A` represents data from the same        dimension while each column in `T_A` represents data from a different        dimension.    T_B : numpy.ndarray        The time series or sequence that will be used to annotate T_A. For every        subsequence in T_A, its nearest neighbor in T_B will be recorded.    m : int        Window size    excl_zone : int        The half width for the exclusion zone relative to the current        sliding window    M_T : numpy.ndarray        Sliding mean of time series, `T`    Σ_T : numpy.ndarray        Sliding standard deviation of time series, `T`    μ_Q : numpy.ndarray        Mean of the query sequence, `Q`, relative to the current sliding window    σ_Q : numpy.ndarray        Standard deviation of the query sequence, `Q`, relative to the current        sliding window    T_subseq_isconstant : numpy.ndarray        A boolearn array representing Rolling isconstant for `T`    Q_subseq_isconstant : numpy.ndarray        A boolearn array representing Rolling isconstant for `Q`    include : numpy.ndarray        A list of (zero-based) indices corresponding to the dimensions in `T` that        must be included in the constrained multidimensional motif search.        For more information, see Section IV D in:        `DOI: 10.1109/ICDM.2017.66 \        <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__    discords : bool        When set to `True`, this reverses the distance profile to favor discords rather        than motifs. Note that indices in `include` are still maintained and respected.    Returns    -------    P : numpy.ndarray        The multi-dimensional matrix profile. Each row of the array corresponds        to each matrix profile for a given dimension (i.e., the first row is        the 1-D matrix profile and the second row is the 2-D matrix profile).    I : numpy.ndarray        The multi-dimensional matrix profile index where each row of the array        corresponds to each matrix profile index for a given dimension.    """d,n=T_B.shapel=n-m+1P=np.empty((d,l),dtype=np.float64)I=np.empty((d,l),dtype=np.int64)hosts=list(dask_client.ncores().keys())nworkers=len(hosts)step=int(math.ceil(l/nworkers))forstartinrange(0,l,step):P[:,start],I[:,start]=_get_first_mstump_profile(start,T_A,T_B,m,excl_zone,M_T,Σ_T,μ_Q,σ_Q,T_subseq_isconstant,Q_subseq_isconstant,include,discords,)# Scatter data to Dask clusterT_A_future=dask_client.scatter(T_A,broadcast=True,hash=False)M_T_future=dask_client.scatter(M_T,broadcast=True,hash=False)Σ_T_future=dask_client.scatter(Σ_T,broadcast=True,hash=False)μ_Q_future=dask_client.scatter(μ_Q,broadcast=True,hash=False)σ_Q_future=dask_client.scatter(σ_Q,broadcast=True,hash=False)T_subseq_isconstant_future=dask_client.scatter(T_subseq_isconstant,broadcast=True,hash=False)Q_subseq_isconstant_future=dask_client.scatter(Q_subseq_isconstant,broadcast=True,hash=False)QT_futures=[]QT_first_futures=[]fori,startinenumerate(range(0,l,step)):QT,QT_first=_get_multi_QT(start,T_A,m)QT_future=dask_client.scatter(QT,workers=[hosts[i]],hash=False)QT_first_future=dask_client.scatter(QT_first,workers=[hosts[i]],hash=False)QT_futures.append(QT_future)QT_first_futures.append(QT_first_future)futures=[]fori,startinenumerate(range(0,l,step)):stop=min(l,start+step)futures.append(dask_client.submit(_mstump,T_A_future,m,stop,excl_zone,M_T_future,Σ_T_future,QT_futures[i],QT_first_futures[i],μ_Q_future,σ_Q_future,T_subseq_isconstant_future,Q_subseq_isconstant_future,l,start+1,include,discords,))results=dask_client.gather(futures)fori,startinenumerate(range(0,l,step)):stop=min(l,start+step)P[:,start+1:stop],I[:,start+1:stop]=results[i]returnP,Idef_ray_mstumped(ray_client,T_A,T_B,m,excl_zone,M_T,Σ_T,μ_Q,σ_Q,T_subseq_isconstant,Q_subseq_isconstant,include,discords,):"""    Compute the multi-dimensional z-normalized matrix profile with a `ray` cluster    This is a highly distributed implementation around the Numba JIT-compiled    parallelized `_mstump` function which computes the multi-dimensional matrix    profile according to STOMP. Note that only self-joins are supported.    Parameters    ----------    ray_client : client        A `ray` client. Setting up a cluster is beyond the scope of this library.        Please refer to the `ray` documentation.    T_A : numpy.ndarray        The time series or sequence for which to compute the multi-dimensional        matrix profile. Each row in `T_A` represents data from the same        dimension while each column in `T_A` represents data from a different        dimension.    T_B : numpy.ndarray        The time series or sequence that will be used to annotate T_A. For every        subsequence in T_A, its nearest neighbor in T_B will be recorded.    m : int        Window size    excl_zone : int        The half width for the exclusion zone relative to the current        sliding window    M_T : numpy.ndarray        Sliding mean of time series, `T`    Σ_T : numpy.ndarray        Sliding standard deviation of time series, `T`    μ_Q : numpy.ndarray        Mean of the query sequence, `Q`, relative to the current sliding window    σ_Q : numpy.ndarray        Standard deviation of the query sequence, `Q`, relative to the current        sliding window    T_subseq_isconstant : numpy.ndarray        A boolearn array representing Rolling isconstant for `T`    Q_subseq_isconstant : numpy.ndarray        A boolearn array representing Rolling isconstant for `Q`    include : numpy.ndarray        A list of (zero-based) indices corresponding to the dimensions in `T` that        must be included in the constrained multidimensional motif search.        For more information, see Section IV D in:        `DOI: 10.1109/ICDM.2017.66 \        <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__    discords : bool        When set to `True`, this reverses the distance profile to favor discords rather        than motifs. Note that indices in `include` are still maintained and respected.    Returns    -------    P : numpy.ndarray        The multi-dimensional matrix profile. Each row of the array corresponds        to each matrix profile for a given dimension (i.e., the first row is        the 1-D matrix profile and the second row is the 2-D matrix profile).    I : numpy.ndarray        The multi-dimensional matrix profile index where each row of the array        corresponds to each matrix profile index for a given dimension.    """core.check_ray(ray_client)d,n=T_B.shapel=n-m+1P=np.empty((d,l),dtype=np.float64)I=np.empty((d,l),dtype=np.int64)nworkers=core.get_ray_nworkers(ray_client)step=int(math.ceil(l/nworkers))forstartinrange(0,l,step):P[:,start],I[:,start]=_get_first_mstump_profile(start,T_A,T_B,m,excl_zone,M_T,Σ_T,μ_Q,σ_Q,T_subseq_isconstant,Q_subseq_isconstant,include,discords,)# Put data into Ray object storageT_A_ref=ray_client.put(T_A)M_T_ref=ray_client.put(M_T)Σ_T_ref=ray_client.put(Σ_T)μ_Q_ref=ray_client.put(μ_Q)σ_Q_ref=ray_client.put(σ_Q)T_subseq_isconstant_ref=ray_client.put(T_subseq_isconstant)Q_subseq_isconstant_ref=ray_client.put(Q_subseq_isconstant)QT_refs=[]QT_first_refs=[]forstartinrange(0,l,step):QT,QT_first=_get_multi_QT(start,T_A,m)QT_ref=ray_client.put(QT)QT_first_ref=ray_client.put(QT_first)QT_refs.append(QT_ref)QT_first_refs.append(QT_first_ref)ray_mstump_func=ray_client.remote(core.deco_ray_tor(_mstump))refs=[]fori,startinenumerate(range(0,l,step)):stop=min(l,start+step)refs.append(ray_mstump_func.remote(T_A_ref,m,stop,excl_zone,M_T_ref,Σ_T_ref,QT_refs[i],QT_first_refs[i],μ_Q_ref,σ_Q_ref,T_subseq_isconstant_ref,Q_subseq_isconstant_ref,l,start+1,include,discords,))results=ray_client.get(refs)fori,startinenumerate(range(0,l,step)):stop=min(l,start+step)P[:,start+1:stop],I[:,start+1:stop]=results[i]returnP,I[docs]@core.non_normalized(maamped,exclude=["normalize","T_subseq_isconstant"],)defmstumped(client,T,m,include=None,discords=False,p=2.0,normalize=True,T_subseq_isconstant=None,):"""    Compute the multi-dimensional z-normalized matrix profile with a    ``dask``/``ray`` cluster    This is a highly distributed implementation around the Numba JIT-compiled    parallelized ``_mstump`` function which computes the multi-dimensional matrix    profile according to STOMP. Note that only self-joins are supported.    Parameters    ----------    client : client        A ``dask``/``ray`` client. Setting up a cluster is beyond the scope of this        library. Please refer to the ``dask``/``ray`` documentation.    T : numpy.ndarray        The time series or sequence for which to compute the multi-dimensional        matrix profile. Each row in ``T`` represents data from the same        dimension while each column in ``T`` represents data from a different        dimension.    m : int        Window size.    include : list, numpy.ndarray, default None        A list of (zero-based) indices corresponding to the dimensions in ``T`` that        must be included in the constrained multidimensional motif search.        For more information, see Section IV D in:        `DOI: 10.1109/ICDM.2017.66 \        <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__    discords : bool, default False        When set to ``True``, this reverses the distance matrix which results in a        multi-dimensional matrix profile that favors larger matrix profile values        (i.e., discords) rather than smaller values (i.e., motifs). Note that indices        in `include` are still maintained and respected.    p : float, default 2.0        The p-norm to apply for computing the Minkowski distance. Minkowski distance is        typically used with ``p`` being ``1`` or ``2``, which correspond to the        Manhattan distance and the Euclidean distance, respectively.    normalize : bool, default True        When set to ``True``, this z-normalizes subsequences prior to computing        distances. Otherwise, this function gets re-routed to its complementary        non-normalized equivalent set in the ``@core.non_normalized`` function        decorator.    T_subseq_isconstant : numpy.ndarray, function, or list, default None        A parameter that is used to show whether a subsequence of a time series in ``T``        is constant (``True``) or not. ``T_subseq_isconstant`` can be a 2D boolean        ``numpy.ndarray`` or a function that can be applied to each time series in        ``T``. Alternatively, for maximum flexibility, a list (with length equal to the        total number of time series) may also be used. In this case,        ``T_subseq_isconstant[i]`` corresponds to the ``i``-th time series ``T[i]`` and        each element in the list can either be a 1D boolean ``numpy.ndarray``, a        function, or ``None``.    Returns    -------    P : numpy.ndarray        The multi-dimensional matrix profile. Each row of the array corresponds        to each matrix profile for a given dimension (i.e., the first row is        the 1-D matrix profile and the second row is the 2-D matrix profile).    I : numpy.ndarray        The multi-dimensional matrix profile index where each row of the array        corresponds to each matrix profile index for a given dimension.    See Also    --------    stumpy.mstump : Compute the multi-dimensional z-normalized matrix profile    stumpy.subspace : Compute the k-dimensional matrix profile subspace for a given        subsequence index and its nearest neighbor index    stumpy.mdl : Compute the number of bits needed to compress one array with another        using the minimum description length (MDL)    Notes    -----    `DOI: 10.1109/ICDM.2017.66 \    <https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__    See mSTAMP Algorithm    Examples    --------    >>> import stumpy    >>> import numpy as np    >>> from dask.distributed import Client    >>> if __name__ == "__main__":    ...     with Client() as dask_client:    ...         stumpy.mstumped(    ...             dask_client,    ...             np.array([[584., -11., 23., 79., 1001., 0., -19.],    ...                       [  1.,   2.,  4.,  8.,   16., 0.,  32.]]),    ...             m=3)    (array([[0.        , 1.43947142, 0.        , 2.69407392, 0.11633857],            [0.777905  , 2.36179922, 1.50004632, 2.92246722, 0.777905  ]]),     array([[2, 4, 0, 1, 0],            [4, 4, 0, 1, 0]]))    Alternatively, you can also use `ray`    >>> import ray    >>> if __name__ == "__main__":    >>>     ray.init()    >>>     stumpy.mstumped(    ...         ray,    ...         np.array([[584., -11., 23., 79., 1001., 0., -19.],    ...                   [  1.,   2.,  4.,  8.,   16., 0.,  32.]]),    ...         m=3)    >>>     ray.shutdown()    """T_A=TT_B=T_AT_A=core._preprocess(T_A)T_B=core._preprocess(T_B)T_A_subseq_isconstant=T_subseq_isconstantT_A_subseq_isconstant=core.process_isconstant(T_A,m,T_A_subseq_isconstant)T_B_subseq_isconstant=T_A_subseq_isconstantT_A,M_T,Σ_T,T_subseq_isconstant=core.preprocess(T_A,m,T_subseq_isconstant=T_A_subseq_isconstant)T_B,μ_Q,σ_Q,Q_subseq_isconstant=core.preprocess(T_B,m,T_subseq_isconstant=T_B_subseq_isconstant)ifT_A.ndim<=1:# pragma: no covererr=f"T is{T_A.ndim}-dimensional and must be at least 2-dimensional"raiseValueError(f"{err}")# mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]`# must be passed to the function `core.check_window_size`.core.check_window_size(m,max_size=min(T_A.shape[1],T_B.shape[1]),n=T_A.shape[1])ifincludeisnotNone:include=core._preprocess_include(include)excl_zone=int(np.ceil(m/config.STUMPY_EXCL_ZONE_DENOM))# See Definition 3 and Figure 3_mstumped=core._client_to_func(client)P,I=_mstumped(client,T_A,T_B,m,excl_zone,M_T,Σ_T,μ_Q,σ_Q,T_subseq_isconstant,Q_subseq_isconstant,include,discords,)returnmparray(P_=P,I_=I)
Movatterモバイル変換

Source code for stumpy.mstumped