@@ -118,22 +118,20 @@ namespace cv
118118static void threshold_runner (const oclMat &src, oclMat &dst,double thresh,double maxVal,int thresholdType)
119119 {
120120bool ival = src.depth () < CV_32F;
121+ int cn = src.channels (), vecSize =4 , depth = src.depth ();
121122 std::vector<uchar> thresholdValue =scalarToVector (cv::Scalar::all (ival ?cvFloor (thresh) : thresh), dst.depth (),
122123 dst.oclchannels (), dst.channels ());
123124 std::vector<uchar> maxValue =scalarToVector (cv::Scalar::all (maxVal), dst.depth (), dst.oclchannels (), dst.channels ());
124125
125- size_t localThreads[3 ] = {16 ,16 ,1 };
126- size_t globalThreads[3 ] = { dst.cols , dst.rows ,1 };
127-
128126const char *const thresholdMap[] = {" THRESH_BINARY" ," THRESH_BINARY_INV" ," THRESH_TRUNC" ,
129127" THRESH_TOZERO" ," THRESH_TOZERO_INV" };
130128const char *const channelMap[] = {" " ," " ," 2" ," 4" ," 4" };
131129const char *const typeMap[] = {" uchar" ," char" ," ushort" ," short" ," int" ," float" ," double" };
132- std::string buildOptions =format (" -D T=%s%s -D %s" , typeMap[src.depth ()], channelMap[src.channels ()],
133- thresholdMap[thresholdType]);
130+ std::string buildOptions =format (" -D T=%s%s -D %s" , typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
134131
135- int src_step = src.step / src.elemSize (), src_offset = src.offset / src.elemSize ();
136- int dst_step = dst.step / dst.elemSize (), dst_offset = dst.offset / dst.elemSize ();
132+ int elemSize = src.elemSize ();
133+ int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
134+ int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
137135
138136 vector< pair<size_t ,const void *> > args;
139137 args.push_back (make_pair (sizeof (cl_mem), (void *)&src.data ));
@@ -142,11 +140,32 @@ namespace cv
142140 args.push_back (make_pair (sizeof (cl_mem), (void *)&dst.data ));
143141 args.push_back (make_pair (sizeof (cl_int), (void *)&dst_offset));
144142 args.push_back (make_pair (sizeof (cl_int), (void *)&dst_step));
145- args.push_back (make_pair (sizeof (cl_int), (void *)&dst.rows ));
146- args.push_back (make_pair (sizeof (cl_int), (void *)&dst.cols ));
147143 args.push_back (make_pair (thresholdValue.size (), (void *)&thresholdValue[0 ]));
148144 args.push_back (make_pair (maxValue.size (), (void *)&maxValue[0 ]));
149145
146+ int max_index = dst.cols , cols = dst.cols ;
147+ if (cn ==1 && vecSize >1 )
148+ {
149+ CV_Assert (((vecSize -1 ) & vecSize) ==0 && vecSize <=16 );
150+ cols =divUp (cols, vecSize);
151+ buildOptions +=format (" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d" ,
152+ typeMap[depth], vecSize, vecSize, vecSize, vecSize);
153+
154+ int vecSizeBytes = vecSize * dst.elemSize1 ();
155+ if ((dst.offset % dst.step ) % vecSizeBytes ==0 && dst.step % vecSizeBytes ==0 )
156+ buildOptions +=" -D DST_ALIGNED" ;
157+ if ((src.offset % src.step ) % vecSizeBytes ==0 && src.step % vecSizeBytes ==0 )
158+ buildOptions +=" -D SRC_ALIGNED" ;
159+
160+ args.push_back (make_pair (sizeof (cl_int), (void *)&max_index));
161+ }
162+
163+ args.push_back (make_pair (sizeof (cl_int), (void *)&dst.rows ));
164+ args.push_back (make_pair (sizeof (cl_int), (void *)&cols));
165+
166+ size_t localThreads[3 ] = {16 ,16 ,1 };
167+ size_t globalThreads[3 ] = { cols, dst.rows ,1 };
168+
150169openCLExecuteKernel (src.clCxt , &imgproc_threshold," threshold" , globalThreads, localThreads, args,
151170 -1 , -1 , buildOptions.c_str ());
152171 }