Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbb525fe

Browse files
authored
Merge pull request#26865 from amane-ame:dxt_hal_rvv
Add RISC-V HAL implementation for cv::dft and cv::dct#26865This patch implements `static cv::DFT` function in RVV_HAL using native intrinsic, optimizing the performance for `cv::dft` and `cv::dct` with data types `32FC1/64FC1/32FC2/64FC2`.The reason I chose to create a new `cv_hal_dftOcv` interface is that if I were to use the existing interfaces (`cv_hal_dftInit1D` and `cv_hal_dft1D`), it would require handling and parsing the dft flags within HAL, as well as performing preprocessing operations such as handling unit roots. Since these operations are not performance hotspots and do not require optimization, reusing the existing interfaces would result in copying approximately 300 lines of code from `core/src/dxt.cpp` into HAL, which I believe is unnecessary.Moreover, if I insert the new interface into `static cv::DFT`, both `static cv::RealDFT` and `static cv::DCT` can be optimized as well. The processing performed before and after calling `static cv::DFT` in these functions is also not a performance hotspot.Tested on MUSE-PI (Spacemit X60) for both gcc 14.2 and clang 20.0.```$ opencv_test_core --gtest_filter="*DFT*"$ opencv_perf_core --gtest_filter="*dft*:*dct*" --perf_min_samples=30 --perf_force_samples=30```The head of the perf table is shown below since the table is too long.View the full perf table here: [hal_rvv_dxt.pdf](https://github.com/user-attachments/files/18622645/hal_rvv_dxt.pdf)<img width="1017" alt="Untitled" src="https://github.com/user-attachments/assets/609856e7-9c7d-4a95-9923-45c1b77eb3a2" />### Pull Request Readiness ChecklistSee details athttps://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request- [x] I agree to contribute to the project under Apache 2 License.- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV- [ ] The PR is proposed to the proper branch- [ ] There is a reference to the original bug report and related work- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name.- [ ] The feature is well documented and sample code can be built with the project CMake
1 parent57a78cb commitbb525fe

File tree

7 files changed

+657
-61
lines changed

7 files changed

+657
-61
lines changed

‎3rdparty/hal_rvv/hal_rvv.hpp‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#if defined(__riscv_v) && __riscv_v == 1000000
2323
#include"hal_rvv_1p0/merge.hpp"// core
2424
#include"hal_rvv_1p0/mean.hpp"// core
25+
#include"hal_rvv_1p0/dxt.hpp"// core
2526
#include"hal_rvv_1p0/norm.hpp"// core
2627
#include"hal_rvv_1p0/norm_diff.hpp"// core
2728
#include"hal_rvv_1p0/norm_hamming.hpp"// core

‎3rdparty/hal_rvv/hal_rvv_1p0/dxt.hpp‎

Lines changed: 569 additions & 0 deletions
Large diffs are not rendered by default.

‎3rdparty/hal_rvv/hal_rvv_1p0/minmax.hpp‎

Lines changed: 51 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,65 @@
11
// This file is part of OpenCV project.
22
// It is subject to the license terms in the LICENSE file found in the top-level directory
33
// of this distribution and at http://opencv.org/license.html.
4-
#ifndefOPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED
5-
#defineOPENCV_HAL_RVV_MINMAXIDX_HPP_INCLUDED
4+
#ifndefOPENCV_HAL_RVV_MINMAX_HPP_INCLUDED
5+
#defineOPENCV_HAL_RVV_MINMAX_HPP_INCLUDED
66

77
#include<riscv_vector.h>
88

9-
namespacecv {namespacecv_hal_rvv {
9+
namespacecv {namespacecv_hal_rvv {namespaceminmax {
1010

1111
#undef cv_hal_minMaxIdx
12-
#definecv_hal_minMaxIdx cv::cv_hal_rvv::minMaxIdx
12+
#definecv_hal_minMaxIdx cv::cv_hal_rvv::minmax::minMaxIdx
1313
#undef cv_hal_minMaxIdxMaskStep
14-
#definecv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minMaxIdx
14+
#definecv_hal_minMaxIdxMaskStep cv::cv_hal_rvv::minmax::minMaxIdx
1515

16-
namespace
17-
{
18-
template<typename T>structrvv;
16+
template<typename T>structrvv;
1917

20-
#defineHAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \
21-
template<>structrvv<T> \
22-
{ \
23-
usingvec_t = v##IS_U##int##EEW##EMUL##_t; \
24-
usingbool_t = vbool##B_LEN##_t; \
25-
staticinlinesize_tvsetvlmax() {return __riscv_vsetvlmax_e##EEW##EMUL(); } \
26-
staticinlinesize_tvsetvl(size_t a) {return __riscv_vsetvl_e##EEW##EMUL(a); } \
27-
staticinlinevec_tvmv_v_x(T a,size_t b) {return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \
28-
staticinlinevec_tvle(const T* a,size_t b) {return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
29-
staticinline vuint8##M_EMUL##_tvle_mask(const uchar* a,size_t b) {return __riscv_vle8_v_u8##M_EMUL(a, b); } \
30-
staticinlinevec_tvmin_tu(vec_t a,vec_t b,vec_t c,size_t d) {return __riscv_vmin##IS_U##_tu(a, b, c, d); } \
31-
staticinlinevec_tvmax_tu(vec_t a,vec_t b,vec_t c,size_t d) {return __riscv_vmax##IS_U##_tu(a, b, c, d); } \
32-
staticinlinevec_tvmin_tumu(bool_t a,vec_t b,vec_t c,vec_t d,size_t e) {return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \
33-
staticinlinevec_tvmax_tumu(bool_t a,vec_t b,vec_t c,vec_t d,size_t e) {return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \
34-
staticinlinevec_tvredmin(vec_t a,vec_t b,size_t c) {return __riscv_vredmin##IS_U(a, b, c); } \
35-
staticinlinevec_tvredmax(vec_t a,vec_t b,size_t c) {return __riscv_vredmax##IS_U(a, b, c); } \
36-
};
37-
HAL_RVV_GENERATOR(uchar ,8 ,u8 , u, m1, m1 ,8 )
38-
HAL_RVV_GENERATOR(schar ,8 ,i8 , , m1, m1 ,8 )
39-
HAL_RVV_GENERATOR(ushort,16,u16, u, m1, mf2,16)
40-
HAL_RVV_GENERATOR(short ,16,i16, , m1, mf2,16)
41-
#undef HAL_RVV_GENERATOR
18+
#defineHAL_RVV_GENERATOR(T, EEW, TYPE, IS_U, EMUL, M_EMUL, B_LEN) \
19+
template<>structrvv<T> \
20+
{ \
21+
usingvec_t = v##IS_U##int##EEW##EMUL##_t; \
22+
usingbool_t = vbool##B_LEN##_t; \
23+
staticinlinesize_tvsetvlmax() {return __riscv_vsetvlmax_e##EEW##EMUL(); } \
24+
staticinlinesize_tvsetvl(size_t a) {return __riscv_vsetvl_e##EEW##EMUL(a); } \
25+
staticinlinevec_tvmv_v_x(T a,size_t b) {return __riscv_vmv_v_x_##TYPE##EMUL(a, b); } \
26+
staticinlinevec_tvle(const T* a,size_t b) {return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
27+
staticinline vuint8##M_EMUL##_tvle_mask(const uchar* a,size_t b) {return __riscv_vle8_v_u8##M_EMUL(a, b); } \
28+
staticinlinevec_tvmin_tu(vec_t a,vec_t b,vec_t c,size_t d) {return __riscv_vmin##IS_U##_tu(a, b, c, d); } \
29+
staticinlinevec_tvmax_tu(vec_t a,vec_t b,vec_t c,size_t d) {return __riscv_vmax##IS_U##_tu(a, b, c, d); } \
30+
staticinlinevec_tvmin_tumu(bool_t a,vec_t b,vec_t c,vec_t d,size_t e) {return __riscv_vmin##IS_U##_tumu(a, b, c, d, e); } \
31+
staticinlinevec_tvmax_tumu(bool_t a,vec_t b,vec_t c,vec_t d,size_t e) {return __riscv_vmax##IS_U##_tumu(a, b, c, d, e); } \
32+
staticinlinevec_tvredmin(vec_t a,vec_t b,size_t c) {return __riscv_vredmin##IS_U(a, b, c); } \
33+
staticinlinevec_tvredmax(vec_t a,vec_t b,size_t c) {return __riscv_vredmax##IS_U(a, b, c); } \
34+
};
35+
HAL_RVV_GENERATOR(uchar ,8 ,u8 , u, m1, m1 ,8 )
36+
HAL_RVV_GENERATOR(schar ,8 ,i8 , , m1, m1 ,8 )
37+
HAL_RVV_GENERATOR(ushort,16,u16, u, m1, mf2,16)
38+
HAL_RVV_GENERATOR(short ,16,i16, , m1, mf2,16)
39+
#undef HAL_RVV_GENERATOR
4240

43-
#defineHAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \
44-
template<>structrvv<T> \
45-
{ \
46-
usingvec_t = v##NAME##EEW##EMUL##_t; \
47-
usingbool_t = vbool##B_LEN##_t; \
48-
staticinlinesize_tvsetvlmax() {return __riscv_vsetvlmax_e##EEW##EMUL(); } \
49-
staticinlinesize_tvsetvl(size_t a) {return __riscv_vsetvl_e##EEW##EMUL(a); } \
50-
staticinlinevec_tvmv_v_x(T a,size_t b) {return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \
51-
staticinline vuint32##P_EMUL##_tvid(size_t a) {return __riscv_vid_v_u32##P_EMUL(a); } \
52-
staticinline vuint32##P_EMUL##_tvundefined() {return __riscv_vundefined_u32##P_EMUL(); } \
53-
staticinlinevec_tvle(const T* a,size_t b) {return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
54-
staticinline vuint8##M_EMUL##_tvle_mask(const uchar* a,size_t b) {return __riscv_vle8_v_u8##M_EMUL(a, b); } \
55-
staticinlinebool_tvmlt(vec_t a,vec_t b,size_t c) {return __riscv_vm##F_OR_S##lt(a, b, c); } \
56-
staticinlinebool_tvmgt(vec_t a,vec_t b,size_t c) {return __riscv_vm##F_OR_S##gt(a, b, c); } \
57-
staticinlinebool_tvmlt_mu(bool_t a,bool_t b,vec_t c,vec_t d,size_t e) {return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \
58-
staticinlinebool_tvmgt_mu(bool_t a,bool_t b,vec_t c,vec_t d,size_t e) {return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \
59-
staticinline Tvmv_x_s(vec_t a) {return __riscv_v##IS_F##mv_##F_OR_X(a); } \
60-
};
61-
HAL_RVV_GENERATOR(int ,int ,32,i32, , s, x, m4, m1 , m4,8 )
62-
HAL_RVV_GENERATOR(float ,float,32,f32, f, f, f, m4, m1 , m4,8 )
63-
HAL_RVV_GENERATOR(double,float,64,f64, f, f, f, m4, mf2, m2,16)
64-
#undef HAL_RVV_GENERATOR
65-
}
41+
#defineHAL_RVV_GENERATOR(T, NAME, EEW, TYPE, IS_F, F_OR_S, F_OR_X, EMUL, M_EMUL, P_EMUL, B_LEN) \
42+
template<>structrvv<T> \
43+
{ \
44+
usingvec_t = v##NAME##EEW##EMUL##_t; \
45+
usingbool_t = vbool##B_LEN##_t; \
46+
staticinlinesize_tvsetvlmax() {return __riscv_vsetvlmax_e##EEW##EMUL(); } \
47+
staticinlinesize_tvsetvl(size_t a) {return __riscv_vsetvl_e##EEW##EMUL(a); } \
48+
staticinlinevec_tvmv_v_x(T a,size_t b) {return __riscv_v##IS_F##mv_v_##F_OR_X##_##TYPE##EMUL(a, b); } \
49+
staticinline vuint32##P_EMUL##_tvid(size_t a) {return __riscv_vid_v_u32##P_EMUL(a); } \
50+
staticinline vuint32##P_EMUL##_tvundefined() {return __riscv_vundefined_u32##P_EMUL(); } \
51+
staticinlinevec_tvle(const T* a,size_t b) {return __riscv_vle##EEW##_v_##TYPE##EMUL(a, b); } \
52+
staticinline vuint8##M_EMUL##_tvle_mask(const uchar* a,size_t b) {return __riscv_vle8_v_u8##M_EMUL(a, b); } \
53+
staticinlinebool_tvmlt(vec_t a,vec_t b,size_t c) {return __riscv_vm##F_OR_S##lt(a, b, c); } \
54+
staticinlinebool_tvmgt(vec_t a,vec_t b,size_t c) {return __riscv_vm##F_OR_S##gt(a, b, c); } \
55+
staticinlinebool_tvmlt_mu(bool_t a,bool_t b,vec_t c,vec_t d,size_t e) {return __riscv_vm##F_OR_S##lt##_mu(a, b, c, d, e); } \
56+
staticinlinebool_tvmgt_mu(bool_t a,bool_t b,vec_t c,vec_t d,size_t e) {return __riscv_vm##F_OR_S##gt##_mu(a, b, c, d, e); } \
57+
staticinline Tvmv_x_s(vec_t a) {return __riscv_v##IS_F##mv_##F_OR_X(a); } \
58+
};
59+
HAL_RVV_GENERATOR(int ,int ,32,i32, , s, x, m4, m1 , m4,8 )
60+
HAL_RVV_GENERATOR(float ,float,32,f32, f, f, f, m4, m1 , m4,8 )
61+
HAL_RVV_GENERATOR(double,float,64,f64, f, f, f, m4, mf2, m2,16)
62+
#undef HAL_RVV_GENERATOR
6663

6764
template<typename T>
6865
inlineintminMaxIdxReadTwice(const uchar* src_data,size_t src_step,int width,int height,double* minVal,double* maxVal,
@@ -330,6 +327,6 @@ inline int minMaxIdx(const uchar* src_data, size_t src_step, int width, int heig
330327
return CV_HAL_ERROR_NOT_IMPLEMENTED;
331328
}
332329

333-
}}
330+
}}}
334331

335332
#endif

‎3rdparty/hal_rvv/hal_rvv_1p0/norm.hpp‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66

77
#include<riscv_vector.h>
88

9-
namespacecv {namespacecv_hal_rvv {
9+
namespacecv {namespacecv_hal_rvv {namespacenorm {
1010

1111
#undef cv_hal_norm
12-
#definecv_hal_norm cv::cv_hal_rvv::norm
12+
#definecv_hal_norm cv::cv_hal_rvv::norm::norm
1313

1414
inlineintnormInf_8UC1(const uchar* src,size_t src_step,const uchar* mask,size_t mask_step,int width,int height,double* result)
1515
{
@@ -512,6 +512,6 @@ inline int norm(const uchar* src, size_t src_step, const uchar* mask, size_t mas
512512
return CV_HAL_ERROR_NOT_IMPLEMENTED;
513513
}
514514

515-
}}
515+
}}}
516516

517517
#endif

‎3rdparty/hal_rvv/hal_rvv_1p0/norm_diff.hpp‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66

77
#include<riscv_vector.h>
88

9-
namespacecv {namespacecv_hal_rvv {
9+
namespacecv {namespacecv_hal_rvv {namespacenorm_diff {
1010

1111
#undef cv_hal_normDiff
12-
#definecv_hal_normDiff cv::cv_hal_rvv::normDiff
12+
#definecv_hal_normDiff cv::cv_hal_rvv::norm_diff::normDiff
1313

1414
inlineintnormDiffInf_8UC1(const uchar* src1,size_t src1_step,const uchar* src2,size_t src2_step,const uchar* mask,size_t mask_step,int width,int height,double* result)
1515
{
@@ -590,7 +590,7 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size
590590
if(ret == CV_HAL_ERROR_OK && (norm_type & NORM_RELATIVE))
591591
{
592592
double result_;
593-
ret =cv::cv_hal_rvv::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type & ~NORM_RELATIVE, &result_);
593+
ret =cv::cv_hal_rvv::norm::norm(src2, src2_step, mask, mask_step, width, height, type, norm_type & ~NORM_RELATIVE, &result_);
594594
if(ret == CV_HAL_ERROR_OK)
595595
{
596596
*result /= result_ + DBL_EPSILON;
@@ -600,6 +600,6 @@ inline int normDiff(const uchar* src1, size_t src1_step, const uchar* src2, size
600600
return ret;
601601
}
602602

603-
}}
603+
}}}
604604

605605
#endif

‎modules/core/src/dxt.cpp‎

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,17 @@ DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
844844
Complex<T> t;
845845
T scale = (T)c.scale;
846846

847+
if(typeid(T) ==typeid(float))
848+
{
849+
CALL_HAL(dft, cv_hal_dft,reinterpret_cast<const uchar*>(src),reinterpret_cast<uchar*>(dst), CV_32F,
850+
c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute);
851+
}
852+
if(typeid(T) ==typeid(double))
853+
{
854+
CALL_HAL(dft, cv_hal_dft,reinterpret_cast<const uchar*>(src),reinterpret_cast<uchar*>(dst), CV_64F,
855+
c.nf, c.factors, c.scale, c.itab, c.wave, c.tab_size, c.n, c.isInverse, c.noPermute);
856+
}
857+
847858
if( c.useIpp )
848859
{
849860
#ifdef USE_IPP_DFT

‎modules/core/src/hal_replacement.hpp‎

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,10 +756,28 @@ inline int hal_ni_dft1D(cvhalDFT *context, const uchar *src, uchar *dst) { retur
756756
*/
757757
inlineinthal_ni_dftFree1D(cvhalDFT *context) {return CV_HAL_ERROR_NOT_IMPLEMENTED; }
758758

759+
/**
760+
@param src source data
761+
@param dst destination data
762+
@param depth depth of source
763+
@param nf OcvDftOptions data
764+
@param factors OcvDftOptions data
765+
@param scale OcvDftOptions data
766+
@param itab OcvDftOptions data
767+
@param wave OcvDftOptions data
768+
@param tab_size OcvDftOptions data
769+
@param n OcvDftOptions data
770+
@param isInverse OcvDftOptions data
771+
@param noPermute OcvDftOptions data
772+
*/
773+
inlineinthal_ni_dft(const uchar* src, uchar* dst,int depth,int nf,int *factors,double scale,int* itab,void* wave,
774+
int tab_size,int n,bool isInverse,bool noPermute) {return CV_HAL_ERROR_NOT_IMPLEMENTED; }
775+
759776
//! @cond IGNORED
760777
#definecv_hal_dftInit1D hal_ni_dftInit1D
761778
#definecv_hal_dft1D hal_ni_dft1D
762779
#definecv_hal_dftFree1D hal_ni_dftFree1D
780+
#definecv_hal_dft hal_ni_dft
763781
//! @endcond
764782

765783
/**

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp