| Signature | Description | Parameters |
|---|---|---|
#include<DataFrame/DataFrameStatsVisitors.h>template<typename T,typename I=unsignedlong,typename Cmp=std::less<T>>struct ExtremumSubArrayVisitor;// -------------------------------------template<typename T,typename I=unsignedlong>using MaxSubArrayVisitor= ExtremumSubArrayVisitor<T, I,std::less<T>>;template<typename T,typename I=unsignedlong>using MinSubArrayVisitor= ExtremumSubArrayVisitor<T, I,std::greater<T>>; | This functor finds the subinterval within the given column that contains elements with maximum or minimum (depending onCmp. The default is maximum) sum than any other interval. It does it in O(n). In the constructor, the user can specify maximum and minimum values to consider. The default is to consider all values. get_result() returns the maximum sum. get_begin_idx() returns the beginning index of the interval get_end_idx() returns the end (exclusive) index of the interval explicit ExtremumSubArrayVisitor(T min_to_consider = -std::numeric_limits<T>::max(), T max_to_consider = std::numeric_limits<T>::max()); | T: Column data type. T must be an arithmetic-enabled type I: Index type. |
#include<DataFrame/DataFrameStatsVisitors.h>template<std::size_t N,typename T,typename I=unsignedlong,typename Cmp=std::less<T>,std::size_t A=0>struct NExtremumSubArrayVisitor;// -------------------------------------template<std::size_t N,typename T,typename I,std::size_t A=0>using NMaxSubArrayVisitor= NExtremumSubArrayVisitor<N, T, I,std::less<T>, A>;template<std::size_t N,typename T,typename I,std::size_t A=0>using NMinSubArrayVisitor= NExtremumSubArrayVisitor<N, T, I,std::greater<T>, A>; | This functor is similar to the aboveExtremumSubArrayVisitor, but it finds at most N subintervals within the given column that contain elements with maximum or minimum sum than any other interval. It does it in O(n + kNlog(N)); wheren = column length,k = a multiplier less thann,N = number of max sums to find The reason these two visitors are separated is because of performance complexity get_result() returns a std::vector of the following struct sortedCmp parameter. struct SubArrayInfo { T sum { }; std::size_t begin_index { 0 }; std::size_t end_index { 0 }; }; explicit NExtremumSubArrayVisitor(T min_to_consider = -std::numeric_limits<T>::max(), T max_to_consider = std::numeric_limits<T>::max()); | T: Column data type. T must be an arithmetic-enabled type I: Index type A: Memory alignment boundary for vectors. Default is system default alignment |
staticvoid test_ExtremumSubArrayVisitor(){std::cout<<"\nTesting ExtremumSubArrayVisitor{ } ..."<<std::endl;std::vector<unsignedlong> idx={123450,123451,123452,123453,123454,123455,123456,123457,123458,123459,123460,123461,123462,123466,123467,123468,123469,123470,123471,123472,123473,123467,123468,123469,123470,123471,123472,123473,123467,123468,123469,123470,123471,123472,123473,}; MyDataFrame df; df.load_index(std::move(idx)); df.load_column<double>("dbl_col1",{1,2,3,4,5,6,7,8,9,10}, nan_policy::dont_pad_with_nans); df.load_column<double>("dbl_col2",{-3,1,-8,4,-1,2,1,-5,5}, nan_policy::dont_pad_with_nans); ExtremumSubArrayVisitor<double> msa_v; df.visit<double>("dbl_col1", msa_v); assert(msa_v.get_result()==55); assert(msa_v.get_begin_idx()==0); assert(msa_v.get_end_idx()==10); df.visit<double>("dbl_col2", msa_v); assert(msa_v.get_result()==6); assert(msa_v.get_begin_idx()==3); assert(msa_v.get_end_idx()==7);}// -----------------------------------------------------------------------------staticvoid test_NExtremumSubArrayVisitor(){std::cout<<"\nTesting NExtremumSubArrayVisitor{ } ..."<<std::endl;std::vector<unsignedlong> idx={123450,123451,123452,123453,123454,123455,123456,123457,123458,123459,123460,123461,123462,123466,123467,123468,123469,123470,123471,123472,123473,123467,123468,123469,123470,123471,123472,123473,123467,123468,123469,123470,123471,123472,123473,}; MyDataFrame df; df.load_index(std::move(idx)); df.load_column<double>("dbl_col1",{1,2,3,4,5,6,7,8,9,10}, nan_policy::dont_pad_with_nans); df.load_column<double>("dbl_col2",{-3,1,-8,4,-1,2,1,-5,5}, nan_policy::dont_pad_with_nans); NExtremumSubArrayVisitor<5,double> msa_v; df.visit<double>("dbl_col1", msa_v); assert((msa_v.get_result()[0].sum==21)); assert((msa_v.get_result()[0].begin_index==0)); assert((msa_v.get_result()[0].end_index==6)); assert((msa_v.get_result()[1].sum==28)); assert((msa_v.get_result()[1].begin_index==0)); assert((msa_v.get_result()[1].end_index==7)); assert((msa_v.get_result()[2].sum==36)); assert((msa_v.get_result()[2].begin_index==0)); assert((msa_v.get_result()[2].end_index==8)); assert((msa_v.get_result()[3].sum==45)); assert((msa_v.get_result()[3].begin_index==0)); assert((msa_v.get_result()[3].end_index==9)); assert((msa_v.get_result()[4].sum==55)); assert((msa_v.get_result()[4].begin_index==0)); assert((msa_v.get_result()[4].end_index==10)); NExtremumSubArrayVisitor<4,double> msa_v2; df.visit<double>("dbl_col2", msa_v2); assert((msa_v2.get_result()[0].sum==1)); assert((msa_v2.get_result()[0].begin_index==1)); assert((msa_v2.get_result()[0].end_index==2)); assert((msa_v2.get_result()[1].sum==4)); assert((msa_v2.get_result()[1].begin_index==3)); assert((msa_v2.get_result()[1].end_index==4)); assert((msa_v2.get_result()[2].sum==5)); assert((msa_v2.get_result()[2].begin_index==3)); assert((msa_v2.get_result()[2].end_index==6)); assert((msa_v2.get_result()[3].sum==6)); assert((msa_v2.get_result()[3].begin_index==3)); assert((msa_v2.get_result()[3].end_index==7)); NExtremumSubArrayVisitor<5,double,unsignedlong,std::greater<double>> msa_v3; df.visit<double>("dbl_col1", msa_v3); assert((msa_v3.get_result()[0].sum==1)); assert((msa_v3.get_result()[0].begin_index==0)); assert((msa_v3.get_result()[0].end_index==1)); df.visit<double>("dbl_col2", msa_v3); assert((msa_v3.get_result()[0].sum==-3)); assert((msa_v3.get_result()[0].begin_index==0)); assert((msa_v3.get_result()[0].end_index==1)); assert((msa_v3.get_result()[1].sum==-10)); assert((msa_v3.get_result()[1].begin_index==0)); assert((msa_v3.get_result()[1].end_index==3));}