Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbf58049

Browse files
Andrey PavlenkoOpenCV Buildbot
Andrey Pavlenko
authored and
OpenCV Buildbot
committed
Merge pull request#1724 from ilya-lavrenov:ocl_thresh
2 parentse80f5be +1d5f5d2 commitbf58049

File tree

3 files changed

+91
-14
lines changed

3 files changed

+91
-14
lines changed

‎modules/ocl/perf/perf_imgproc.cpp‎

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,21 +366,23 @@ PERF_TEST_P(resizeFixture, resize,
366366

367367
///////////// threshold////////////////////////
368368

369-
CV_ENUM(ThreshType, THRESH_BINARY,THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO,THRESH_TOZERO_INV)
369+
CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
370370

371-
typedef tuple<Size, ThreshType> ThreshParams;
371+
typedef tuple<Size,MatType,ThreshType> ThreshParams;
372372
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
373373

374374
PERF_TEST_P(ThreshFixture, threshold,
375375
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
376+
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
376377
ThreshType::all()))
377378
{
378379
const ThreshParams params =GetParam();
379380
const Size srcSize = get<0>(params);
380-
constint threshType = get<1>(params);
381+
constint srcType = get<1>(params);
382+
constint threshType = get<2>(params);
381383
constdouble maxValue =220.0, threshold =50;
382384

383-
Matsrc(srcSize,CV_8U),dst(srcSize,CV_8U);
385+
Matsrc(srcSize,srcType),dst(srcSize,srcType);
384386
randu(src,0,100);
385387
declare.in(src).out(dst);
386388

‎modules/ocl/src/imgproc.cpp‎

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,22 +118,20 @@ namespace cv
118118
staticvoidthreshold_runner(const oclMat &src, oclMat &dst,double thresh,double maxVal,int thresholdType)
119119
{
120120
bool ival = src.depth() < CV_32F;
121+
int cn = src.channels(), vecSize =4, depth = src.depth();
121122
std::vector<uchar> thresholdValue =scalarToVector(cv::Scalar::all(ival ?cvFloor(thresh) : thresh), dst.depth(),
122123
dst.oclchannels(), dst.channels());
123124
std::vector<uchar> maxValue =scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
124125

125-
size_t localThreads[3] = {16,16,1 };
126-
size_t globalThreads[3] = { dst.cols, dst.rows,1 };
127-
128126
constchar *const thresholdMap[] = {"THRESH_BINARY","THRESH_BINARY_INV","THRESH_TRUNC",
129127
"THRESH_TOZERO","THRESH_TOZERO_INV" };
130128
constchar *const channelMap[] = {"","","2","4","4" };
131129
constchar *const typeMap[] = {"uchar","char","ushort","short","int","float","double" };
132-
std::string buildOptions =format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()],
133-
thresholdMap[thresholdType]);
130+
std::string buildOptions =format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
134131

135-
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
136-
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
132+
int elemSize = src.elemSize();
133+
int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
134+
int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
137135

138136
vector< pair<size_t,constvoid *> > args;
139137
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
@@ -142,11 +140,32 @@ namespace cv
142140
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
143141
args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset));
144142
args.push_back(make_pair(sizeof(cl_int), (void *)&dst_step));
145-
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.rows));
146-
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.cols));
147143
args.push_back(make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
148144
args.push_back(make_pair(maxValue.size(), (void *)&maxValue[0]));
149145

146+
int max_index = dst.cols, cols = dst.cols;
147+
if (cn ==1 && vecSize >1)
148+
{
149+
CV_Assert(((vecSize -1) & vecSize) ==0 && vecSize <=16);
150+
cols =divUp(cols, vecSize);
151+
buildOptions +=format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
152+
typeMap[depth], vecSize, vecSize, vecSize, vecSize);
153+
154+
int vecSizeBytes = vecSize * dst.elemSize1();
155+
if ((dst.offset % dst.step) % vecSizeBytes ==0 && dst.step % vecSizeBytes ==0)
156+
buildOptions +=" -D DST_ALIGNED";
157+
if ((src.offset % src.step) % vecSizeBytes ==0 && src.step % vecSizeBytes ==0)
158+
buildOptions +=" -D SRC_ALIGNED";
159+
160+
args.push_back(make_pair(sizeof(cl_int), (void *)&max_index));
161+
}
162+
163+
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.rows));
164+
args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
165+
166+
size_t localThreads[3] = {16,16,1 };
167+
size_t globalThreads[3] = { cols, dst.rows,1 };
168+
150169
openCLExecuteKernel(src.clCxt, &imgproc_threshold,"threshold", globalThreads, localThreads, args,
151170
-1, -1, buildOptions.c_str());
152171
}

‎modules/ocl/src/opencl/imgproc_threshold.cl‎

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,63 @@
5151
#endif
5252
#endif
5353

54+
#ifdefVECTORIZED
55+
56+
__kernelvoidthreshold(__globalconstT* restrictsrc,intsrc_offset,intsrc_step,
57+
__globalT*dst,intdst_offset,intdst_step,
58+
Tthresh,Tmax_val,intmax_index,introws,intcols)
59+
{
60+
intgx=get_global_id(0);
61+
intgy=get_global_id(1);
62+
63+
if (gx<cols&&gy<rows)
64+
{
65+
gx *=VECSIZE;
66+
intsrc_index=mad24(gy,src_step,src_offset+gx);
67+
intdst_index=mad24(gy,dst_step,dst_offset+gx);
68+
69+
#ifdefSRC_ALIGNED
70+
VTsdata=*((__globalVT*)(src+src_index));
71+
#else
72+
VTsdata=VLOADN(0,src+src_index);
73+
#endif
74+
VTvthresh= (VT)(thresh),zero= (VT)(0);
75+
76+
#ifdefTHRESH_BINARY
77+
VTvecValue=sdata>vthresh ?max_val :zero;
78+
#elif definedTHRESH_BINARY_INV
79+
VTvecValue=sdata>vthresh ?zero :max_val;
80+
#elif definedTHRESH_TRUNC
81+
VTvecValue=sdata>vthresh ?thresh :sdata;
82+
#elif definedTHRESH_TOZERO
83+
VTvecValue=sdata>vthresh ?sdata :zero;
84+
#elif definedTHRESH_TOZERO_INV
85+
VTvecValue=sdata>vthresh ?zero :sdata;
86+
#endif
87+
88+
if (gx+VECSIZE <=max_index)
89+
#ifdefDST_ALIGNED
90+
*(__globalVT*)(dst+dst_index)=vecValue;
91+
#else
92+
VSTOREN(vecValue,0,dst+dst_index);
93+
#endif
94+
else
95+
{
96+
Tarray[VECSIZE];
97+
VSTOREN(vecValue,0,array);
98+
#pragma unroll
99+
for (inti=0;i<VECSIZE;++i)
100+
if (gx+i<max_index)
101+
dst[dst_index+i]=array[i];
102+
}
103+
}
104+
}
105+
106+
#else
107+
54108
__kernelvoidthreshold(__globalconstT* restrictsrc,intsrc_offset,intsrc_step,
55109
__globalT*dst,intdst_offset,intdst_step,
56-
introws,intcols,Tthresh,Tmax_val)
110+
Tthresh,Tmax_val,introws,intcols)
57111
{
58112
intgx=get_global_id(0);
59113
intgy=get_global_id(1);
@@ -78,3 +132,5 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
78132
#endif
79133
}
80134
}
135+
136+
#endif

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp