Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbe4331c

Browse files
committed
Remove the need for an extra buffer + conversion.
1 parent7c2c88e commitbe4331c

File tree

1 file changed

+55
-93
lines changed

1 file changed

+55
-93
lines changed

‎modules/imgproc/src/resize.cpp‎

Lines changed: 55 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -3034,89 +3034,60 @@ static inline void vx_load_as(const ushort* ptr, v_float32& a)
30343034
staticinlinevoidvx_load_as(constshort* ptr, v_float32& a)
30353035
{ a =v_cvt_f32(v_reinterpret_as_s32(vx_load_expand(ptr))); }
30363036

3037-
template<typename VT>
3038-
VTvx_setall_local(double coeff);
3039-
template<>
3040-
v_float32vx_setall_local(double coeff) {
3037+
staticinlinevoidvx_load_as(constfloat* ptr, v_float32& a)
3038+
{ a =v_load(ptr); }
3039+
3040+
v_float32vx_setall_local(float coeff) {
30413041
returnv_setall_f32(coeff);
30423042
}
3043-
template<typename WT,typename VT>
3044-
voidv_inter_area_set_sum(int col_end,const WT *const buf,const VT &v_coeff,
3045-
WT *sum,int &x) {
3043+
#if CV_SIMD128_64F
3044+
staticinlinevoidvx_load_as(constdouble* ptr, v_float64& a)
3045+
{ a =v_load(ptr); }
3046+
3047+
v_float64vx_setall_local(double coeff) {
3048+
returnv_setall_f64(coeff);
3049+
}
3050+
#endif
3051+
template<typename T,typename WT,typename VT>
3052+
voidv_inter_area_set_or_update_sum(const T *const src,int n,bool do_set,
3053+
WT coeff, WT *sum) {
30463054
constexprint step = VT::nlanes;
3047-
for (x =0; x + step < col_end; x += step)
3055+
const VT v_coeff =vx_setall_local(coeff);
3056+
int x;
3057+
if (do_set)
30483058
{
3049-
const VT line =vx_load(buf + x);
3050-
v_store(sum + x, line * v_coeff);
3059+
for (x =0; x + step < n; x += step)
3060+
{
3061+
VT line;
3062+
vx_load_as(src + x, line);
3063+
v_store(sum + x, line * v_coeff);
3064+
}
3065+
for(; x < n; ++x) sum[x] = saturate_cast<WT>(src[x]) * coeff;
30513066
}
3052-
}
3053-
template<typename WT,typename VT>
3054-
voidv_inter_area_update_sum(int col_end,const WT *const buf,const VT &v_coeff,
3055-
WT *sum,int &x) {
3056-
constexprint step = VT::nlanes;
3057-
for (x =0; x + step < col_end; x += step)
3067+
else
30583068
{
3059-
const VT line =vx_load(buf + x);
3060-
const VT sum_x =vx_load(sum + x);
3061-
v_store(sum + x, sum_x + line * v_coeff);
3069+
for (x =0; x + step < n; x += step)
3070+
{
3071+
VT line;
3072+
vx_load_as(src + x, line);
3073+
const VT sum_x =vx_load(sum + x);
3074+
v_store(sum + x, sum_x + line * v_coeff);
3075+
}
3076+
for(; x < n; ++x) sum[x] += saturate_cast<WT>(src[x]) * coeff;
30623077
}
30633078
}
3064-
template<typename S>
3065-
voidv_inter_area_copy_or_not(const S* s,int n,float *d,floatconst**buf)
3066-
{
3067-
static_assert(!std::is_same<S,float>::value,"Do not specialize for float");
3068-
constexprint step = v_float32::nlanes;
3069-
int x =0;
3070-
for(; x + step < n; x += step)
3079+
#if !CV_SIMD128_64F
3080+
voidv_inter_area_set_or_update_sum(constdouble *const src,int n,bool do_set,
3081+
double coeff,double *sum) {
3082+
int x;
3083+
if (do_set)
30713084
{
3072-
v_float32 a;
3073-
vx_load_as(s + x, a);
3074-
v_store(d + x, a);
3085+
for(x =0; x < n; ++x) sum[x] = src[x] * coeff;
3086+
}
3087+
else
3088+
{
3089+
for(x =0; x < n; ++x) sum[x] += src[x] * coeff;
30753090
}
3076-
for(; x < n; ++x) d[x] = saturate_cast<float>(s[x]);
3077-
*buf = d;
3078-
}
3079-
voidv_inter_area_copy_or_not(constdouble* s,int n,double *d,constdouble **buf)
3080-
{
3081-
(void)n;
3082-
(void)d;
3083-
*buf = s;
3084-
}
3085-
voidv_inter_area_copy_or_not(constfloat* s,int n,float *d,constfloat **buf)
3086-
{
3087-
(void)n;
3088-
(void)d;
3089-
*buf = s;
3090-
}
3091-
3092-
#if CV_SIMD128_64F
3093-
template<>
3094-
v_float64vx_setall_local(double coeff) {
3095-
returnv_setall_f64(coeff);
3096-
}
3097-
#else
3098-
template<>
3099-
v_uint8vx_setall_local(double coeff) {
3100-
(void)coeff;
3101-
returnv_setall_u8(0);
3102-
}
3103-
template<>
3104-
voidv_inter_area_set_sum(int col_end,constdouble *const buf,const v_uint8 &v_coeff,
3105-
double *sum,int &x) {
3106-
(void)col_end;
3107-
(void)buf;
3108-
(void)v_coeff;
3109-
(void)sum;
3110-
x =0;
3111-
}
3112-
template<>
3113-
voidv_inter_area_update_sum(int col_end,constdouble *const buf,const v_uint8 &v_coeff,
3114-
double *sum,int &x) {
3115-
(void)col_end;
3116-
(void)buf;
3117-
(void)v_coeff;
3118-
(void)sum;
3119-
x =0;
31203091
}
31213092
#endif
31223093
}
@@ -3144,10 +3115,8 @@ class ResizeArea_Invoker : public ParallelLoopBody
31443115
Size dsize = dst->size();
31453116
constint cn = dst->channels();
31463117
dsize.width *= cn;
3147-
AutoBuffer<WT>_buffer(std::max(src->cols * cn, range.size() * cn));
31483118
const DecimateAlpha* xtab = xtab0;
31493119
constint xtab_size = xtab_size0;
3150-
WT *buf = _buffer.data();
31513120
constint j_start = tabofs[range.start], j_end = tabofs[range.end];
31523121

31533122
static_assert(
@@ -3183,7 +3152,6 @@ class ResizeArea_Invoker : public ParallelLoopBody
31833152
}
31843153
int prev_di = -1;
31853154
int di =0;
3186-
const WT* buf_local;
31873155
WT* sum =nullptr;
31883156
for (int j = row_start; j < row_end; ++j)
31893157
{
@@ -3194,37 +3162,31 @@ class ResizeArea_Invoker : public ParallelLoopBody
31943162
coeff = ytab[j].alpha;
31953163
di = ytab[j].di;
31963164
si = ytab[j].si;
3197-
const T* S = src->templateptr<T>(si);
3198-
// Convert the line to the proper float/double type.
3199-
v_inter_area_copy_or_not(S, col_end, buf, &buf_local);
32003165
}
32013166
else
32023167
{
32033168
coeff = xtab[j].alpha;
32043169
di = xtab[j].di / cn;
32053170
si = xtab[j].si / cn;
3206-
buf_local = tmp.templateptr<WT>(si);
32073171
}
3208-
const VT v_coeff = vx_setall_local<VT>(coeff);
32093172

3210-
if (di != prev_di)
3173+
if (di != prev_di) sum = tmp.templateptr<WT>(di - start_di);
3174+
3175+
if (iter ==0)
32113176
{
3212-
sum = tmp.templateptr<WT>(di - start_di);
3213-
int x;
3214-
v_inter_area_set_sum(col_end, buf_local, v_coeff, sum, x);
3215-
for (; x < col_end; ++x) sum[x] = buf_local[x] * coeff;
3216-
prev_di = di;
3177+
const T* s = src->templateptr<T>(si);
3178+
v_inter_area_set_or_update_sum<T, WT, VT>(s, col_end, di != prev_di,
3179+
coeff, sum);
32173180
}
32183181
else
32193182
{
3220-
int x;
3221-
v_inter_area_update_sum(col_end, buf_local, v_coeff, sum, x);
3222-
for (; x < col_end; ++x) sum[x] += buf_local[x] *coeff;
3183+
const WT* s = tmp.templateptr<WT>(si);
3184+
v_inter_area_set_or_update_sum<WT, WT, VT>(s, col_end, di != prev_di,
3185+
coeff, sum);
32233186
}
3187+
3188+
if (di != prev_di) prev_di = di;
32243189
}
3225-
// Deal with the last row.
3226-
WT* D = tmp.templateptr<WT>(di - start_di);
3227-
for (int x =0; x < col_end; ++x) D[x] = sum[x];
32283190

32293191
tmp =tmp(cv::Range(0, di - start_di +1),cv::Range(0, col_end / cn)).t();
32303192
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp