Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
forked fromopencv/opencv

Commit17f0573

Browse files
WanliZhongthewoz
authored andcommitted
Improve and refactor softmax layer (opencv#24466)
* improve and refactor softmax layer* fix building error* compatible region layer* fix axisStep when disable SIMD* fix dynamic array* try to fix error* use nlanes from VTraits* move axisBias to srcOffset* fix bug caused by axisBias* remove macro* replace #ifdef with #if for CV_SIMD
1 parentdea43a7 commit17f0573

File tree

6 files changed

+251
-82
lines changed

6 files changed

+251
-82
lines changed

‎modules/dnn/perf/perf_layer.cpp‎

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,4 +758,55 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(
758758
dnnBackendsAndTargets()
759759
));
760760

761+
typedef TestBaseWithParam<tuple<std::vector<int>,int, tuple<Backend, Target> > > Layer_Softmax;
762+
PERF_TEST_P_(Layer_Softmax, softmax_3d) {
763+
std::vector<int> shape = get<0>(GetParam());
764+
int axis = get<1>(GetParam());
765+
int backendId = get<0>(get<2>(GetParam()));
766+
int targetId = get<1>(get<2>(GetParam()));
767+
768+
Matdata(shape, CV_32FC1);
769+
Scalar mean =0.f;
770+
Scalar std =1.f;
771+
randn(data, mean, std);
772+
773+
Net net;
774+
LayerParams lp;
775+
lp.type ="Softmax";
776+
lp.name ="testLayer";
777+
lp.set("axis", axis);
778+
779+
net.addLayerToPrev(lp.name, lp.type, lp);
780+
// warmup
781+
{
782+
net.setInput(data);
783+
net.setPreferableBackend(backendId);
784+
net.setPreferableTarget(targetId);
785+
Mat out = net.forward();
786+
}
787+
788+
TEST_CYCLE() {
789+
Mat res = net.forward();
790+
}
791+
792+
SANITY_CHECK_NOTHING();
793+
}
794+
795+
INSTANTIATE_TEST_CASE_P(/**/, Layer_Softmax, Combine(
796+
Values(// input size
797+
std::vector<int>({16,50,50}),
798+
std::vector<int>({16,197,197}),
799+
std::vector<int>({16,1024,1024})
800+
),
801+
Values(0,1,2),// axis
802+
dnnBackendsAndTargets(/* withInferenceEngine=*/false,
803+
/* withHalide=*/false,
804+
/* withCpuOCV=*/true,
805+
/* withVkCom=*/false,
806+
/* withCUDA=*/false,
807+
/* withNgraph=*/false,
808+
/* withWebnn=*/false,
809+
/* withCann=*/false)// only test on CPU
810+
));
811+
761812
}// namespace
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpNN.fx).
6+
// Here is the original license:
7+
/*
8+
This file is a part of ficus language project.
9+
See ficus/LICENSE for the licensing terms
10+
*/
11+
12+
#include"../../precomp.hpp"
13+
#include"softmax.hpp"
14+
15+
namespacecv {namespacednn {
16+
17+
voidsoftmax(Mat &dst,const Mat &src,int axis,int axisBias,int axisStep){
18+
CV_Assert(src.type() == CV_32F);
19+
CV_Assert(src.isContinuous() && dst.isContinuous());
20+
CV_Assert(src.size == dst.size);
21+
axis =normalize_axis(axis, src.dims);
22+
23+
size_t outerSize = src.total(0, axis),
24+
innerSize = src.total(axis +1);
25+
26+
constfloat *srcPtr = src.ptr<float>();
27+
float *dstPtr = dst.ptr<float>();
28+
29+
size_t outerStep = src.total(axis);
30+
size_t cnStep = src.total(axis +1);
31+
32+
// multi-threads
33+
size_t totalTasks = outerSize * innerSize;
34+
double nstripes = (double) totalTasks /1024.0;
35+
// make the channel axis to be multiple of 8
36+
size_t channelAxis = (axisStep +7) & -8;
37+
38+
#if CV_SIMD
39+
constint nlanes = VTraits<v_float32>::vlanes();
40+
// the number of redundant dimension
41+
size_t redundantDim = nlanes - axisStep % nlanes;
42+
#endif
43+
44+
parallel_for_(Range(0, (int) totalTasks), [&](const Range &range) {
45+
AutoBuffer<float>axisBuf_(channelAxis);
46+
float *axisBuf = axisBuf_.data();
47+
48+
for (size_t i = range.start; i < range.end; i++) {
49+
size_t outerDim = i / innerSize;
50+
size_t innerDim = i % innerSize;
51+
size_t srcOffset = outerDim * outerStep + innerDim;
52+
// copy data from src to buf along axis, since the data may not be continuous
53+
for (size_t cnDim =0; cnDim < axisStep; cnDim++)
54+
axisBuf[cnDim] = srcPtr[srcOffset + (cnDim + axisBias) * cnStep];
55+
56+
float s =0.f;
57+
#if CV_SIMD
58+
// make the value of the redundant dimension to be -FLT_MAX
59+
if (redundantDim != nlanes) {
60+
for (size_t j = axisStep; j < axisStep + redundantDim; j++)
61+
axisBuf[j] = -FLT_MAX;
62+
}
63+
// calculate the max value along the axis
64+
v_float32 vmax =vx_load(axisBuf);
65+
for (size_t cnDim = nlanes; cnDim < axisStep; cnDim += nlanes) {
66+
v_float32 val =vx_load(axisBuf + cnDim);
67+
vmax =v_max(vmax, val);
68+
}
69+
float maxVal =v_reduce_max(vmax);
70+
71+
// calculate the exp value along the axis
72+
v_float32 vs =vx_setzero_f32();
73+
vmax =vx_setall_f32(maxVal);
74+
// initialize vexp constant
75+
v_float32 _vexp_lo =vx_setall_f32(-88.3762626647949f);
76+
v_float32 _vexp_hi =vx_setall_f32(88.3762626647949f);
77+
v_float32 _vexp_half =vx_setall_f32(0.5f);
78+
v_float32 _vexp_one =vx_setall_f32(1.f);
79+
v_float32 _vexp_LOG2EF =vx_setall_f32(1.44269504088896341f);
80+
v_float32 _vexp_C1 =vx_setall_f32(-0.693359375f);
81+
v_float32 _vexp_C2 =vx_setall_f32(2.12194440e-4f);
82+
v_float32 _vexp_p0 =vx_setall_f32(1.9875691500E-4f);
83+
v_float32 _vexp_p1 =vx_setall_f32(1.3981999507E-3f);
84+
v_float32 _vexp_p2 =vx_setall_f32(8.3334519073E-3f);
85+
v_float32 _vexp_p3 =vx_setall_f32(4.1665795894E-2f);
86+
v_float32 _vexp_p4 =vx_setall_f32(1.6666665459E-1f);
87+
v_float32 _vexp_p5 =vx_setall_f32(5.0000001201E-1f);
88+
// initialize temp vectors for vexp
89+
v_float32 val, _vexp_, _vexp_x, _vexp_y, _vexp_z;
90+
v_int32 _vexp_mm;
91+
92+
// calculate and sum all data along axis
93+
for (size_t cnDim =0; cnDim < axisStep; cnDim += nlanes) {
94+
val =vx_load(axisBuf + cnDim);
95+
val =v_sub(val, vmax);
96+
97+
// compute vexp of val
98+
_vexp_x =v_min(val, _vexp_hi);
99+
_vexp_x =v_max(_vexp_x, _vexp_lo);
100+
_vexp_ =v_fma(_vexp_x, _vexp_LOG2EF, _vexp_half);
101+
_vexp_mm =v_floor(_vexp_);
102+
_vexp_ =v_cvt_f32(_vexp_mm);
103+
_vexp_mm =v_add(_vexp_mm,vx_setall_s32(0x7f));
104+
_vexp_mm =v_shl(_vexp_mm,23);
105+
_vexp_x =v_fma(_vexp_, _vexp_C1, _vexp_x);
106+
_vexp_x =v_fma(_vexp_, _vexp_C2, _vexp_x);
107+
_vexp_z =v_mul(_vexp_x, _vexp_x);
108+
_vexp_y =v_fma(_vexp_x, _vexp_p0, _vexp_p1);
109+
_vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p2);
110+
_vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p3);
111+
_vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p4);
112+
_vexp_y =v_fma(_vexp_y, _vexp_x, _vexp_p5);
113+
_vexp_y =v_fma(_vexp_y, _vexp_z, _vexp_x);
114+
_vexp_y =v_add(_vexp_y, _vexp_one);
115+
val =v_mul(_vexp_y,v_reinterpret_as_f32(_vexp_mm));
116+
117+
vs =v_add(vs, val);
118+
v_store(axisBuf + cnDim, val);
119+
}
120+
121+
s =v_reduce_sum(vs);
122+
// subtract the value of the redundant dimension
123+
if (redundantDim != nlanes) {
124+
float* _val =newfloat[nlanes];
125+
v_store(_val, val);
126+
for (size_t j = nlanes - redundantDim; j < nlanes; j++)
127+
s -= _val[j];
128+
}
129+
#else
130+
float maxVal = axisBuf[0];
131+
for (size_t cnDim =1; cnDim < axisStep; cnDim++) {
132+
maxVal =std::max(maxVal, axisBuf[cnDim]);
133+
}
134+
for (size_t j =0; j < axisStep; j++) {
135+
axisBuf[j] =expf(axisBuf[j] - maxVal);
136+
s += axisBuf[j];
137+
}
138+
#endif
139+
s =1.f / s;
140+
141+
// copy back the result to src
142+
for (size_t cnDim =0; cnDim < axisStep; cnDim++)
143+
dstPtr[srcOffset + (cnDim + axisBias) * cnStep] = axisBuf[cnDim] * s;
144+
}
145+
}, nstripes);
146+
}
147+
148+
voidsoftmax(Mat &dst,const Mat &src,int axis) {
149+
softmax(dst, src, axis,0, src.size[axis]);
150+
}
151+
152+
voidlogSoftmax(Mat &dst,const Mat &src,int axis) {
153+
softmax(dst, src, axis);
154+
log(dst, dst);
155+
}
156+
157+
}}// cv::dnn
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpNN.fx).
6+
// Here is the original license:
7+
/*
8+
This file is a part of ficus language project.
9+
See ficus/LICENSE for the licensing terms
10+
*/
11+
12+
#ifndef OPENCV_DNN_SOFTMAX_HPP
13+
#defineOPENCV_DNN_SOFTMAX_HPP
14+
15+
#include"opencv2/core/hal/intrin.hpp"
16+
#include<opencv2/dnn/shape_utils.hpp>
17+
18+
namespacecv {namespacednn {
19+
20+
voidsoftmax(Mat &dst,const Mat &src,int axis,int axisBias,int axisStep);
21+
22+
voidsoftmax(Mat &dst,const Mat &src,int axis);
23+
24+
voidlogSoftmax(Mat &dst,const Mat &src,int axis);
25+
26+
}}// cv::dnn
27+
28+
#endif// OPENCV_DNN_SOFTMAX_HPP

‎modules/dnn/src/layers/region_layer.cpp‎

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include<opencv2/dnn/shape_utils.hpp>
4646
#include<opencv2/dnn/all_layers.hpp>
4747
#include"../nms.inl.hpp"
48+
#include"cpu_kernels/softmax.hpp"
4849

4950
#ifdef HAVE_OPENCL
5051
#include"opencl_kernels_dnn.hpp"
@@ -280,10 +281,8 @@ class RegionLayerImpl CV_FINAL : public RegionLayer
280281
}
281282

282283
if (useSoftmax) {// Yolo v2
283-
for (int i =0; i < batch_size*rows*cols*anchors; ++i) {
284-
int index = cell_size*i;
285-
softmax_activate(srcData + index +5, classes,1, dstData + index +5);
286-
}
284+
Mat _inpBlob = inpBlob.reshape(0, outBlob.dims, outBlob.size);
285+
softmax(outBlob, _inpBlob, -1,5, classes);
287286
}
288287
elseif (useLogistic) {// Yolo v3
289288
for (int i =0; i < batch_size*rows*cols*anchors; ++i){

‎modules/dnn/src/layers/softmax_layer.cpp‎

Lines changed: 5 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include<algorithm>
5353
#include<stdlib.h>
5454
#include<opencv2/core/utils/logger.hpp>
55+
#include"cpu_kernels/softmax.hpp"
5556
using std::max;
5657

5758
#ifdef HAVE_OPENCL
@@ -225,89 +226,15 @@ class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
225226
std::vector<Mat> inputs, outputs, internals;
226227
inputs_arr.getMatVector(inputs);
227228
outputs_arr.getMatVector(outputs);
228-
internals_arr.getMatVector(internals);
229229

230230
const Mat &src = inputs[0];
231231
Mat &dst = outputs[0];
232-
233232
int axis =normalize_axis(axisRaw, src.dims);
234-
size_t outerSize = src.total(0, axis), channels = src.size[axis],
235-
innerSize = src.total(axis +1);
236-
237-
CV_Assert(src.type() == CV_32F);
238-
CV_Assert(src.isContinuous() && dst.isContinuous());
239-
240-
constfloat *srcPtr = src.ptr<float>();
241-
float *dstPtr = dst.ptr<float>();
242-
float *bufPtr = internals[0].ptr<float>();
243-
244-
size_t outerStep = src.total(axis);
245-
size_t cnStep = src.total(axis +1);
246-
247-
//compute max along axis
248-
for (size_t outerDim =0; outerDim < outerSize; outerDim++)
249-
{
250-
size_t srcOffset = outerDim * outerStep;
251-
size_t bufOffset = outerDim * cnStep;
252-
253-
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize *sizeof(float));
254-
255-
for (size_t cnDim =1; cnDim < channels; cnDim++)
256-
{
257-
for (size_t i =0; i < innerSize; i++)
258-
bufPtr[bufOffset + i] =std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
259-
}
260-
}
261-
262-
//subtract max
263-
for (size_t outerDim =0; outerDim < outerSize; outerDim++)
264-
{
265-
size_t srcOffset = outerDim * outerStep;
266-
size_t bufOffset = outerDim * cnStep;
267-
268-
for (size_t cnDim =0; cnDim < channels; cnDim++)
269-
{
270-
constint offset = srcOffset + cnDim * cnStep;
271-
for (size_t i =0; i < innerSize; i++)
272-
dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
273-
}
274-
}
275-
276-
cv::exp(dst, dst);
277-
278-
for (size_t outerDim =0; outerDim < outerSize; outerDim++)
279-
{
280-
size_t srcOffset = outerDim * outerStep;
281-
size_t bufOffset = outerDim * cnStep;
282-
283-
//sum exp along axis
284-
for (size_t i =0; i < innerSize; i++)
285-
bufPtr[bufOffset + i] =0.f;
286233

287-
for (size_t cnDim =0; cnDim < channels; cnDim++)
288-
{
289-
constint offset = srcOffset + cnDim * cnStep;
290-
for (size_t i =0; i < innerSize; i++)
291-
bufPtr[bufOffset + i] += dstPtr[offset + i];
292-
}
293-
294-
//divide by computed sum
295-
for (size_t cnDim =0; cnDim < channels; cnDim++)
296-
{
297-
constint offset = srcOffset + cnDim * cnStep;
298-
for (size_t i =0; i < innerSize; i++)
299-
dstPtr[offset + i] /= bufPtr[bufOffset + i];
300-
}
301-
if (logSoftMax)
302-
{
303-
for (size_t cnDim =0; cnDim < channels; cnDim++)
304-
{
305-
constint offset = srcOffset + cnDim * cnStep;
306-
for (size_t i =0; i < innerSize; i++)
307-
dstPtr[offset + i] =log(dstPtr[offset + i]);
308-
}
309-
}
310-
}
234+
if(logSoftMax)
235+
logSoftmax(dst, src, axis);
236+
else
237+
softmax(dst, src, axis);
311238
}
312239

313240
#ifdef HAVE_CUDA

‎modules/dnn/src/onnx/onnx_importer.cpp‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2788,6 +2788,13 @@ void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::No
27882788
voidONNXImporter::parseSoftMax(LayerParams& layerParams,const opencv_onnx::NodeProto& node_proto)
27892789
{
27902790
const std::string& layer_type = node_proto.op_type();
2791+
int axis;
2792+
if (layerParams.has("opset") && layerParams.get<int>("opset") >11) {
2793+
axis = layerParams.get<int>("axis", -1);
2794+
}else {
2795+
axis = layerParams.get<int>("axis",1);
2796+
}
2797+
layerParams.set<int>("axis", axis);
27912798
layerParams.type ="Softmax";
27922799
layerParams.set("log_softmax", layer_type =="LogSoftmax");
27932800
addLayer(layerParams, node_proto);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2026 Movatter.jp