thewoz/opencvPublic

forked fromopencv/opencv

NotificationsYou must be signed in to change notification settings
Fork0
Star0

Commit17f0573

WanliZhong

authored and

thewoz

committed

Improve and refactor softmax layer (opencv#24466)

* improve and refactor softmax layer* fix building error* compatible region layer* fix axisStep when disable SIMD* fix dynamic array* try to fix error* use nlanes from VTraits* move axisBias to srcOffset* fix bug caused by axisBias* remove macro* replace #ifdef with #if for CV_SIMD

1 parentdea43a7 commit17f0573Copy full SHA for 17f0573

File tree

6 files changed

+251

-82

lines changed

modules/dnn
- perf
  - perf_layer.cpp
- src
  - layers
    - cpu_kernels
      - softmax.cpp
      - softmax.hpp
    - region_layer.cpp
    - softmax_layer.cpp
  - onnx
    - onnx_importer.cpp

6 files changed

+251

-82

lines changed

`‎modules/dnn/perf/perf_layer.cpp‎`

Lines changed: 51 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -758,4 +758,55 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(`
`758`	`758`	`dnnBackendsAndTargets()`
`759`	`759`	`));`
`760`	`760`
	`761`	`+typedef TestBaseWithParam<tuple<std::vector<int>,int, tuple<Backend, Target> > > Layer_Softmax;`
	`762`	`+PERF_TEST_P_(Layer_Softmax, softmax_3d) {`
	`763`	`+ std::vector<int> shape = get<0>(GetParam());`
	`764`	`+int axis = get<1>(GetParam());`
	`765`	`+int backendId = get<0>(get<2>(GetParam()));`
	`766`	`+int targetId = get<1>(get<2>(GetParam()));`
	`767`	`+`
	`768`	`+ Matdata(shape, CV_32FC1);`
	`769`	`+ Scalar mean =0.f;`
	`770`	`+ Scalar std =1.f;`
	`771`	`+randn(data, mean, std);`
	`772`	`+`
	`773`	`+ Net net;`
	`774`	`+ LayerParams lp;`
	`775`	`+ lp.type ="Softmax";`
	`776`	`+ lp.name ="testLayer";`
	`777`	`+ lp.set("axis", axis);`
	`778`	`+`
	`779`	`+ net.addLayerToPrev(lp.name, lp.type, lp);`
	`780`	`+// warmup`
	`781`	`+ {`
	`782`	`+ net.setInput(data);`
	`783`	`+ net.setPreferableBackend(backendId);`
	`784`	`+ net.setPreferableTarget(targetId);`
	`785`	`+ Mat out = net.forward();`
	`786`	`+ }`
	`787`	`+`
	`788`	`+TEST_CYCLE() {`
	`789`	`+ Mat res = net.forward();`
	`790`	`+ }`
	`791`	`+`
	`792`	`+SANITY_CHECK_NOTHING();`
	`793`	`+}`
	`794`	`+`
	`795`	`+INSTANTIATE_TEST_CASE_P(/**/, Layer_Softmax, Combine(`
	`796`	`+Values(// input size`
	`797`	`+ std::vector<int>({16,50,50}),`
	`798`	`+ std::vector<int>({16,197,197}),`
	`799`	`+ std::vector<int>({16,1024,1024})`
	`800`	`+ ),`
	`801`	`+ Values(0,1,2),// axis`
	`802`	`+ dnnBackendsAndTargets(/* withInferenceEngine=*/false,`
	`803`	`+/* withHalide=*/false,`
	`804`	`+/* withCpuOCV=*/true,`
	`805`	`+/* withVkCom=*/false,`
	`806`	`+/* withCUDA=*/false,`
	`807`	`+/* withNgraph=*/false,`
	`808`	`+/* withWebnn=*/false,`
	`809`	`+/* withCann=*/false)// only test on CPU`
	`810`	`+));`
	`811`	`+`
`761`	`812`	`}// namespace`

`‎modules/dnn/src/layers/cpu_kernels/softmax.cpp‎`

Lines changed: 157 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,157 @@`
	`1`	`+// This file is part of OpenCV project.`
	`2`	`+// It is subject to the license terms in the LICENSE file found in the top-level directory`
	`3`	`+// of this distribution and at http://opencv.org/license.html.`
	`4`	`+`
	`5`	`+// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpNN.fx).`
	`6`	`+// Here is the original license:`
	`7`	`+/*`
	`8`	`+ This file is a part of ficus language project.`
	`9`	`+ See ficus/LICENSE for the licensing terms`
	`10`	`+*/`
	`11`	`+`
	`12`	`+#include"../../precomp.hpp"`
	`13`	`+#include"softmax.hpp"`
	`14`	`+`
	`15`	`+namespacecv {namespacednn {`
	`16`	`+`
	`17`	`+voidsoftmax(Mat &dst,const Mat &src,int axis,int axisBias,int axisStep){`
	`18`	`+CV_Assert(src.type() == CV_32F);`
	`19`	`+CV_Assert(src.isContinuous() && dst.isContinuous());`
	`20`	`+CV_Assert(src.size == dst.size);`
	`21`	`+ axis =normalize_axis(axis, src.dims);`
	`22`	`+`
	`23`	`+size_t outerSize = src.total(0, axis),`
	`24`	`+ innerSize = src.total(axis +1);`
	`25`	`+`
	`26`	`+constfloat *srcPtr = src.ptr<float>();`
	`27`	`+float *dstPtr = dst.ptr<float>();`
	`28`	`+`
	`29`	`+size_t outerStep = src.total(axis);`
	`30`	`+size_t cnStep = src.total(axis +1);`
	`31`	`+`
	`32`	`+// multi-threads`
	`33`	`+size_t totalTasks = outerSize * innerSize;`
	`34`	`+double nstripes = (double) totalTasks /1024.0;`
	`35`	`+// make the channel axis to be multiple of 8`
	`36`	`+size_t channelAxis = (axisStep +7) & -8;`
	`37`	`+`
	`38`	`+#if CV_SIMD`
	`39`	`+constint nlanes = VTraits<v_float32>::vlanes();`
	`40`	`+// the number of redundant dimension`
	`41`	`+size_t redundantDim = nlanes - axisStep % nlanes;`
	`42`	`+#endif`
	`43`	`+`
	`44`	`+parallel_for_(Range(0, (int) totalTasks), [&](const Range &range) {`
	`45`	`+ AutoBuffer<float>axisBuf_(channelAxis);`
	`46`	`+float *axisBuf = axisBuf_.data();`
	`47`	`+`
	`48`	`+for (size_t i = range.start; i < range.end; i++) {`
	`49`	`+size_t outerDim = i / innerSize;`
	`50`	`+size_t innerDim = i % innerSize;`
	`51`	`+size_t srcOffset = outerDim * outerStep + innerDim;`
	`52`	`+// copy data from src to buf along axis, since the data may not be continuous`
	`53`	`+for (size_t cnDim =0; cnDim < axisStep; cnDim++)`
	`54`	`+ axisBuf[cnDim] = srcPtr[srcOffset + (cnDim + axisBias) * cnStep];`
	`55`	`+`
	`56`	`+float s =0.f;`
	`57`	`+#if CV_SIMD`
	`58`	`+// make the value of the redundant dimension to be -FLT_MAX`
	`59`	`+if (redundantDim != nlanes) {`
	`60`	`+for (size_t j = axisStep; j < axisStep + redundantDim; j++)`
	`61`	`+ axisBuf[j] = -FLT_MAX;`
	`62`	`+ }`
	`63`	`+// calculate the max value along the axis`
	`64`	`+ v_float32 vmax =vx_load(axisBuf);`
	`65`	`+for (size_t cnDim = nlanes; cnDim < axisStep; cnDim += nlanes) {`
	`66`	`+ v_float32 val =vx_load(axisBuf + cnDim);`
	`67`	`+ vmax =v_max(vmax, val);`
	`68`	`+ }`
	`69`	`+float maxVal =v_reduce_max(vmax);`
	`70`	`+`
	`71`	`+// calculate the exp value along the axis`
	`72`	`+ v_float32 vs =vx_setzero_f32();`
	`73`	`+ vmax =vx_setall_f32(maxVal);`
	`74`	`+// initialize vexp constant`
	`75`	`+ v_float32 _vexp_lo =vx_setall_f32(-88.3762626647949f);`
	`76`	`+ v_float32 _vexp_hi =vx_setall_f32(88.3762626647949f);`
	`77`	`+ v_float32 _vexp_half =vx_setall_f32(0.5f);`
	`78`	`+ v_float32 _vexp_one =vx_setall_f32(1.f);`
	`79`	`+ v_float32 _vexp_LOG2EF =vx_setall_f32(1.44269504088896341f);`
	`80`	`+ v_float32 _vexp_C1 =vx_setall_f32(-0.693359375f);`
	`81`	`+ v_float32 _vexp_C2 =vx_setall_f32(2.12194440e-4f);`
	`82`	`+ v_float32 _vexp_p0 =vx_setall_f32(1.9875691500E-4f);`
	`83`	`+ v_float32 _vexp_p1 =vx_setall_f32(1.3981999507E-3f);`
	`84`	`+ v_float32 _vexp_p2 =vx_setall_f32(8.3334519073E-3f);`
	`85`	`+ v_float32 _vexp_p3 =vx_setall_f32(4.1665795894E-2f);`
	`86`	`+ v_float32 _vexp_p4 =vx_setall_f32(1.6666665459E-1f);`
	`87`	`+ v_float32 _vexp_p5 =vx_setall_f32(5.0000001201E-1f);`
	`88`	`+// initialize temp vectors for vexp`
	`89`	`+ v_float32 val, _vexp_, _vexp_x, _vexp_y, _vexp_z;`
	`90`	`+ v_int32 _vexp_mm;`
	`91`	`+`
	`92`	`+// calculate and sum all data along axis`
	`93`	`+for (size_t cnDim =0; cnDim < axisStep; cnDim += nlanes) {`
	`94`	`+ val =vx_load(axisBuf + cnDim);`
	`95`	`+ val =v_sub(val, vmax);`
	`96`	`+`
	`97`	`+// compute vexp of val`
	`98`	`+ _vexp_x =v_min(val, _vexp_hi);`
	`99`	`+ _vexp_x =v_max(_vexp_x, _vexp_lo);`
	`100`	`+ _vexp_ =v_fma(_vexp_x, _vexp_LOG2EF, _vexp_half);`
	`101`	`+ _vexp_mm =v_floor(_vexp_);`
	`102`	`+ _vexp_ =v_cvt_f32(_vexp_mm);`
	`103`	`+ _vexp_mm =v_add(_vexp_mm,vx_setall_s32(0x7f));`
	`104`	`+ _vexp_mm =v_shl(_vexp_mm,23);`
	`105`	`+ _vexp_x =v_fma(_vexp_, _vexp_C1, _vexp_x);`
	`106`	`+ _vexp_x =v_fma(_vexp_, _vexp_C2, _vexp_x);`
	`107`	`+ _vexp_z =v_mul(_vexp_x, _vexp_x);`
	`108`	`+ _vexp_y =v_fma(_vexp_x, _vexp_p0, _vexp_p1);`
	`109`	`+ _vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p2);`
	`110`	`+ _vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p3);`
	`111`	`+ _vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p4);`
	`112`	`+ _vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p5);`
	`113`	`+ _vexp_y =v_fma(_vexp_y, _vexp_z, _vexp_x);`
	`114`	`+ _vexp_y =v_add(_vexp_y, _vexp_one);`
	`115`	`+ val =v_mul(_vexp_y,v_reinterpret_as_f32(_vexp_mm));`
	`116`	`+`
	`117`	`+ vs =v_add(vs, val);`
	`118`	`+v_store(axisBuf + cnDim, val);`
	`119`	`+ }`
	`120`	`+`
	`121`	`+ s =v_reduce_sum(vs);`
	`122`	`+// subtract the value of the redundant dimension`
	`123`	`+if (redundantDim != nlanes) {`
	`124`	`+float* _val =newfloat[nlanes];`
	`125`	`+v_store(_val, val);`
	`126`	`+for (size_t j = nlanes - redundantDim; j < nlanes; j++)`
	`127`	`+ s -= _val[j];`
	`128`	`+ }`
	`129`	`+#else`
	`130`	`+float maxVal = axisBuf[0];`
	`131`	`+for (size_t cnDim =1; cnDim < axisStep; cnDim++) {`
	`132`	`+ maxVal =std::max(maxVal, axisBuf[cnDim]);`
	`133`	`+ }`
	`134`	`+for (size_t j =0; j < axisStep; j++) {`
	`135`	`+ axisBuf[j] =expf(axisBuf[j] - maxVal);`
	`136`	`+ s += axisBuf[j];`
	`137`	`+ }`
	`138`	`+#endif`
	`139`	`+ s =1.f / s;`
	`140`	`+`
	`141`	`+// copy back the result to src`
	`142`	`+for (size_t cnDim =0; cnDim < axisStep; cnDim++)`
	`143`	`+ dstPtr[srcOffset + (cnDim + axisBias) * cnStep] = axisBuf[cnDim] * s;`
	`144`	`+ }`
	`145`	`+ }, nstripes);`
	`146`	`+}`
	`147`	`+`
	`148`	`+voidsoftmax(Mat &dst,const Mat &src,int axis) {`
	`149`	`+softmax(dst, src, axis,0, src.size[axis]);`
	`150`	`+}`
	`151`	`+`
	`152`	`+voidlogSoftmax(Mat &dst,const Mat &src,int axis) {`
	`153`	`+softmax(dst, src, axis);`
	`154`	`+log(dst, dst);`
	`155`	`+}`
	`156`	`+`
	`157`	`+}}// cv::dnn`

`‎modules/dnn/src/layers/cpu_kernels/softmax.hpp‎`

Lines changed: 28 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,28 @@`
	`1`	`+// This file is part of OpenCV project.`
	`2`	`+// It is subject to the license terms in the LICENSE file found in the top-level directory`
	`3`	`+// of this distribution and at http://opencv.org/license.html.`
	`4`	`+`
	`5`	`+// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpNN.fx).`
	`6`	`+// Here is the original license:`
	`7`	`+/*`
	`8`	`+ This file is a part of ficus language project.`
	`9`	`+ See ficus/LICENSE for the licensing terms`
	`10`	`+*/`
	`11`	`+`
	`12`	`+#ifndef OPENCV_DNN_SOFTMAX_HPP`
	`13`	`+#defineOPENCV_DNN_SOFTMAX_HPP`
	`14`	`+`
	`15`	`+#include"opencv2/core/hal/intrin.hpp"`
	`16`	`+#include<opencv2/dnn/shape_utils.hpp>`
	`17`	`+`
	`18`	`+namespacecv {namespacednn {`
	`19`	`+`
	`20`	`+voidsoftmax(Mat &dst,const Mat &src,int axis,int axisBias,int axisStep);`
	`21`	`+`
	`22`	`+voidsoftmax(Mat &dst,const Mat &src,int axis);`
	`23`	`+`
	`24`	`+voidlogSoftmax(Mat &dst,const Mat &src,int axis);`
	`25`	`+`
	`26`	`+}}// cv::dnn`
	`27`	`+`
	`28`	`+#endif// OPENCV_DNN_SOFTMAX_HPP`

`‎modules/dnn/src/layers/region_layer.cpp‎`

Lines changed: 3 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,7 @@`
`45`	`45`	`#include<opencv2/dnn/shape_utils.hpp>`
`46`	`46`	`#include<opencv2/dnn/all_layers.hpp>`
`47`	`47`	`#include"../nms.inl.hpp"`
	`48`	`+#include"cpu_kernels/softmax.hpp"`
`48`	`49`
`49`	`50`	`#ifdef HAVE_OPENCL`
`50`	`51`	`#include"opencl_kernels_dnn.hpp"`
`@@ -280,10 +281,8 @@ class RegionLayerImpl CV_FINAL : public RegionLayer`
`280`	`281`	`}`
`281`	`282`
`282`	`283`	`if (useSoftmax) {// Yolo v2`
`283`		`-for (int i =0; i < batch_sizerowscols*anchors; ++i) {`
`284`		`-int index = cell_size*i;`
`285`		`-softmax_activate(srcData + index +5, classes,1, dstData + index +5);`
`286`		`- }`
	`284`	`+ Mat _inpBlob = inpBlob.reshape(0, outBlob.dims, outBlob.size);`
	`285`	`+softmax(outBlob, _inpBlob, -1,5, classes);`
`287`	`286`	`}`
`288`	`287`	`elseif (useLogistic) {// Yolo v3`
`289`	`288`	`for (int i =0; i < batch_sizerowscols*anchors; ++i){`

`‎modules/dnn/src/layers/softmax_layer.cpp‎`

Lines changed: 5 additions & 78 deletions

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@`
`52`	`52`	`#include<algorithm>`
`53`	`53`	`#include<stdlib.h>`
`54`	`54`	`#include<opencv2/core/utils/logger.hpp>`
	`55`	`+#include"cpu_kernels/softmax.hpp"`
`55`	`56`	`using std::max;`
`56`	`57`
`57`	`58`	`#ifdef HAVE_OPENCL`
`@@ -225,89 +226,15 @@ class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer`
`225`	`226`	`std::vector<Mat> inputs, outputs, internals;`
`226`	`227`	`inputs_arr.getMatVector(inputs);`
`227`	`228`	`outputs_arr.getMatVector(outputs);`
`228`		`- internals_arr.getMatVector(internals);`
`229`	`229`
`230`	`230`	`const Mat &src = inputs[0];`
`231`	`231`	`Mat &dst = outputs[0];`
`232`		`-`
`233`	`232`	`int axis =normalize_axis(axisRaw, src.dims);`
`234`		`-size_t outerSize = src.total(0, axis), channels = src.size[axis],`
`235`		`- innerSize = src.total(axis +1);`
`236`		`-`
`237`		`-CV_Assert(src.type() == CV_32F);`
`238`		`-CV_Assert(src.isContinuous() && dst.isContinuous());`
`239`		`-`
`240`		`-constfloat *srcPtr = src.ptr<float>();`
`241`		`-float *dstPtr = dst.ptr<float>();`
`242`		`-float *bufPtr = internals[0].ptr<float>();`
`243`		`-`
`244`		`-size_t outerStep = src.total(axis);`
`245`		`-size_t cnStep = src.total(axis +1);`
`246`		`-`
`247`		`-//compute max along axis`
`248`		`-for (size_t outerDim =0; outerDim < outerSize; outerDim++)`
`249`		`- {`
`250`		`-size_t srcOffset = outerDim * outerStep;`
`251`		`-size_t bufOffset = outerDim * cnStep;`
`252`		`-`
`253`		`-memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize *sizeof(float));`
`254`		`-`
`255`		`-for (size_t cnDim =1; cnDim < channels; cnDim++)`
`256`		`- {`
`257`		`-for (size_t i =0; i < innerSize; i++)`
`258`		`- bufPtr[bufOffset + i] =std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);`
`259`		`- }`
`260`		`- }`
`261`		`-`
`262`		`-//subtract max`
`263`		`-for (size_t outerDim =0; outerDim < outerSize; outerDim++)`
`264`		`- {`
`265`		`-size_t srcOffset = outerDim * outerStep;`
`266`		`-size_t bufOffset = outerDim * cnStep;`
`267`		`-`
`268`		`-for (size_t cnDim =0; cnDim < channels; cnDim++)`
`269`		`- {`
`270`		`-constint offset = srcOffset + cnDim * cnStep;`
`271`		`-for (size_t i =0; i < innerSize; i++)`
`272`		`- dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];`
`273`		`- }`
`274`		`- }`
`275`		`-`
`276`		`-cv::exp(dst, dst);`
`277`		`-`
`278`		`-for (size_t outerDim =0; outerDim < outerSize; outerDim++)`
`279`		`- {`
`280`		`-size_t srcOffset = outerDim * outerStep;`
`281`		`-size_t bufOffset = outerDim * cnStep;`
`282`		`-`
`283`		`-//sum exp along axis`
`284`		`-for (size_t i =0; i < innerSize; i++)`
`285`		`- bufPtr[bufOffset + i] =0.f;`
`286`	`233`
`287`		`-for (size_t cnDim =0; cnDim < channels; cnDim++)`
`288`		`- {`
`289`		`-constint offset = srcOffset + cnDim * cnStep;`
`290`		`-for (size_t i =0; i < innerSize; i++)`
`291`		`- bufPtr[bufOffset + i] += dstPtr[offset + i];`
`292`		`- }`
`293`		`-`
`294`		`-//divide by computed sum`
`295`		`-for (size_t cnDim =0; cnDim < channels; cnDim++)`
`296`		`- {`
`297`		`-constint offset = srcOffset + cnDim * cnStep;`
`298`		`-for (size_t i =0; i < innerSize; i++)`
`299`		`- dstPtr[offset + i] /= bufPtr[bufOffset + i];`
`300`		`- }`
`301`		`-if (logSoftMax)`
`302`		`- {`
`303`		`-for (size_t cnDim =0; cnDim < channels; cnDim++)`
`304`		`- {`
`305`		`-constint offset = srcOffset + cnDim * cnStep;`
`306`		`-for (size_t i =0; i < innerSize; i++)`
`307`		`- dstPtr[offset + i] =log(dstPtr[offset + i]);`
`308`		`- }`
`309`		`- }`
`310`		`- }`
	`234`	`+if(logSoftMax)`
	`235`	`+logSoftmax(dst, src, axis);`
	`236`	`+else`
	`237`	`+softmax(dst, src, axis);`
`311`	`238`	`}`
`312`	`239`
`313`	`240`	`#ifdef HAVE_CUDA`

`‎modules/dnn/src/onnx/onnx_importer.cpp‎`

Lines changed: 7 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -2788,6 +2788,13 @@ void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::No`
`2788`	`2788`	`voidONNXImporter::parseSoftMax(LayerParams& layerParams,const opencv_onnx::NodeProto& node_proto)`
`2789`	`2789`	`{`
`2790`	`2790`	`const std::string& layer_type = node_proto.op_type();`
	`2791`	`+int axis;`
	`2792`	`+if (layerParams.has("opset") && layerParams.get<int>("opset") >11) {`
	`2793`	`+ axis = layerParams.get<int>("axis", -1);`
	`2794`	`+ }else {`
	`2795`	`+ axis = layerParams.get<int>("axis",1);`
	`2796`	`+ }`
	`2797`	`+ layerParams.set<int>("axis", axis);`
`2791`	`2798`	`layerParams.type ="Softmax";`
`2792`	`2799`	`layerParams.set("log_softmax", layer_type =="LogSoftmax");`
`2793`	`2800`	`addLayer(layerParams, node_proto);`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Commit17f0573

File tree

6 files changed

6 files changed

`‎modules/dnn/perf/perf_layer.cpp‎`

`‎modules/dnn/src/layers/cpu_kernels/softmax.cpp‎`

`‎modules/dnn/src/layers/cpu_kernels/softmax.hpp‎`

`‎modules/dnn/src/layers/region_layer.cpp‎`

`‎modules/dnn/src/layers/softmax_layer.cpp‎`

`‎modules/dnn/src/onnx/onnx_importer.cpp‎`

0 commit comments