Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitee0822d

Browse files
authored
Merge pull request#24378 from fengyuentau:instance_norm
dnn onnx: add instance norm layer#24378Resolves#24377Relates#24092 (comment)| Perf | multi-thread | single-thread || - | - | - || x: [2, 64, 180, 240] | 3.95ms | 11.12ms |Todo:- [x] speed up by multi-threading- [x] add perf- [x] add backend: OpenVINO- [x] add backend: CUDA- [x] add backend: OpenCL (no fp16)- [ ] add backend: CANN (will be done via#24462)### Pull Request Readiness ChecklistSee details athttps://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request- [x] I agree to contribute to the project under Apache 2 License.- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV- [x] The PR is proposed to the proper branch- [x] There is a reference to the original bug report and related work- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name.- [x] The feature is well documented and sample code can be built with the project CMake```force_builders=Linux OpenCL,Win64 OpenCL,Custombuildworker:Custom=linux-4build_image:Custom=ubuntu:18.04modules_filter:Custom=nonedisable_ipp:Custom=ON```
1 parent832f738 commitee0822d

File tree

10 files changed

+454
-43
lines changed

10 files changed

+454
-43
lines changed

‎modules/dnn/include/opencv2/dnn/all_layers.hpp‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,13 @@ CV__DNN_INLINE_NS_BEGIN
11661166
static Ptr<ExpandLayer>create(const LayerParams &params);
11671167
};
11681168

1169+
classCV_EXPORTS InstanceNormLayer : public Layer {
1170+
public:
1171+
float epsilon;
1172+
1173+
static Ptr<InstanceNormLayer>create(const LayerParams &params);
1174+
};
1175+
11691176
//! @}
11701177
//! @}
11711178
CV__DNN_INLINE_NS_END

‎modules/dnn/perf/perf_layer.cpp‎

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,62 @@ PERF_TEST_P_(Layer_GatherElements, GatherElements)
683683
test_layer({2700,1,2914}, {2700,1,81},2);
684684
}
685685

686+
structLayer_InstanceNorm :publicTestBaseWithParam<tuple<Backend, Target> >
687+
{
688+
voidtest_layer(const std::vector<int>& x_shape)
689+
{
690+
int backendId = get<0>(GetParam());
691+
int targetId = get<1>(GetParam());
692+
693+
Matx(x_shape, CV_32FC1);
694+
Matscale(x_shape[1],1, CV_32FC1);
695+
Matb(x_shape[1],1, CV_32FC1);
696+
697+
randu(x,0.f,1.f);
698+
randu(scale,0.f,1.f);
699+
randu(b,0.f,1.f);
700+
701+
Net net;
702+
LayerParams lp;
703+
lp.type ="InstanceNormalization";
704+
lp.name ="testLayer";
705+
int id = net.addLayerToPrev(lp.name, lp.type, lp);
706+
net.connect(0,0, id,0);
707+
net.connect(0,1, id,1);
708+
net.connect(0,2, id,2);
709+
710+
// warmup
711+
{
712+
std::vector<String> inpNames{"x","scale","b"};
713+
net.setInputsNames(inpNames);
714+
net.setInput(x, inpNames[0]);
715+
net.setInput(scale, inpNames[1]);
716+
net.setInput(b, inpNames[2]);
717+
718+
net.setPreferableBackend(backendId);
719+
net.setPreferableTarget(targetId);
720+
Mat out = net.forward();
721+
}
722+
723+
TEST_CYCLE()
724+
{
725+
Mat res = net.forward();
726+
}
727+
728+
SANITY_CHECK_NOTHING();
729+
}
730+
731+
int N =2;
732+
int C =64;
733+
int H =180;
734+
int W =240;
735+
};
736+
737+
PERF_TEST_P_(Layer_InstanceNorm, InstanceNorm)
738+
{
739+
test_layer({N, C, H, W});
740+
}
741+
686742
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false,false));
687743
INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
688744
#ifdef HAVE_CUDA
@@ -693,6 +749,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
693749
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
694750
INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
695751
INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
752+
INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
696753

697754

698755
typedef TestBaseWithParam<tuple<Vec4i,int,bool, tuple<Backend, Target> > > Layer_FullyConnected;

‎modules/dnn/src/cuda/mvn.cu‎

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,17 @@ namespace raw {
6666
output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * scale[outer_idx];
6767
}
6868
}
69+
70+
template<classT>
71+
__global__voidnormalize_mean_variance_channelwise(Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> stdev, size_type inner_size, size_type C) {
72+
for (auto idx :grid_stride_range(output.size())) {
73+
const index_type outer_idx = idx / inner_size;
74+
const index_type c = outer_idx % C;
75+
auto s =static_cast<float>(scale[c]) * stdev[outer_idx];
76+
auto b =static_cast<float>(bias[c]);
77+
output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * s + b;
78+
}
79+
}
6980
}
7081

7182
template<classT>
@@ -142,4 +153,21 @@ template void normalize_mean_variance(const Stream&, Span<__half>, View<__half>,
142153
#endif
143154
templatevoidnormalize_mean_variance(const Stream&, Span<float>, View<float>, View<float>, View<float>, std::size_t);
144155

156+
template<classT>
157+
voidnormalize_mean_variance_channelwise(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> stdev, std::size_t inner_size, std::size_t C)
158+
{
159+
CV_Assert(input.size() == output.size());
160+
CV_Assert(input.size() / inner_size == means.size());
161+
CV_Assert(means.size() == stdev.size());
162+
163+
auto kernel = raw::normalize_mean_variance_channelwise<T>;
164+
auto policy =make_policy(kernel, output.size(),0, stream);
165+
launch_kernel(kernel, policy, output, input, scale, bias, means, stdev, inner_size, C);
166+
}
167+
168+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
169+
templatevoidnormalize_mean_variance_channelwise(const Stream&, Span<__half>/*output*/, View<__half>/*input*/, View<__half>/*scale*/, View<__half>/*bias*/, View<float>/*means*/, View<float>/*stdev*/, std::size_t, std::size_t);
170+
#endif
171+
templatevoidnormalize_mean_variance_channelwise(const Stream&, Span<float>/*output*/, View<float>/*input*/, View<float>/*scale*/, View<float>/*bias*/, View<float>/*means*/, View<float>/*stdev*/, std::size_t, std::size_t);
172+
145173
}}}}/* namespace cv::dnn::cuda4dnn::kernels*/

‎modules/dnn/src/cuda4dnn/kernels/mvn.hpp‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ void normalize_mean(const csl::Stream& stream, csl::Span<T> output, csl::View<T>
2626
template<classT>
2727
voidnormalize_mean_variance(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<float> means, csl::View<float> scale, std::size_t inner_size);
2828

29+
template<classT>
30+
voidnormalize_mean_variance_channelwise(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> stdev, std::size_t inner_size, std::size_t C);
31+
2932
}}}}/* namespace cv::dnn::cuda4dnn::kernels*/
3033

3134
#endif/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP*/
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP
6+
#defineOPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP
7+
8+
#include"../../op_cuda.hpp"
9+
10+
#include"../csl/stream.hpp"
11+
#include"../csl/span.hpp"
12+
#include"../csl/tensor.hpp"
13+
#include"../csl/workspace.hpp"
14+
15+
#include"../kernels/fill_copy.hpp"
16+
#include"../kernels/mvn.hpp"
17+
18+
#include<opencv2/core.hpp>
19+
20+
#include<cstddef>
21+
#include<vector>
22+
#include<utility>
23+
24+
namespacecv {namespacednn {namespacecuda4dnn {
25+
26+
template<classT>
27+
classInstanceNormOpfinal : public CUDABackendNode {
28+
public:
29+
using wrapper_type = GetCUDABackendWrapperType<T>;
30+
31+
InstanceNormOp(csl::Stream stream_,float epsilon_,size_t loops)
32+
: stream(std::move(stream_)), epsilon(epsilon_) {
33+
csl::WorkspaceBuilder builder;
34+
builder.require<float>(loops);
35+
builder.require<float>(loops);
36+
scratch_mem_in_bytes = builder.required_workspace_size();
37+
}
38+
39+
voidforward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,
40+
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
41+
csl::Workspace& workspace)override {
42+
auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
43+
auto scale_wrapper = inputs[1].dynamicCast<wrapper_type>();
44+
auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();
45+
46+
auto input = input_wrapper->getView();
47+
auto scale = scale_wrapper->getView();
48+
auto bias = bias_wrapper->getView();
49+
50+
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
51+
auto output = output_wrapper->getSpan();
52+
53+
auto C = input.get_axis_size(1);
54+
auto loops = input.size_range(0,2);
55+
auto norm_size = input.size_range(2, input.rank());
56+
if (norm_size ==1) {
57+
kernels::fill<T>(stream, output,0.f);
58+
return;
59+
}else {
60+
auto ws_allocator =csl::WorkspaceAllocator(workspace);
61+
62+
auto mean = ws_allocator.get_span<float>(loops);
63+
kernels::fill<float>(stream, mean,0.f);
64+
65+
auto stdev = ws_allocator.get_span<float>(loops);
66+
kernels::fill<float>(stream, stdev,0.f);
67+
68+
kernels::reduce_mean_sqr_sum<T>(stream, mean, stdev, input, norm_size);
69+
kernels::compute_normalization_scale(stream, stdev, mean, stdev, norm_size, epsilon);
70+
kernels::normalize_mean_variance_channelwise<T>(stream, output, input, scale, bias, mean, stdev, norm_size, C);
71+
}
72+
}
73+
74+
std::size_tget_workspace_memory_in_bytes()constnoexceptoverride {return scratch_mem_in_bytes; }
75+
76+
private:
77+
csl::Stream stream;
78+
79+
float epsilon;
80+
81+
std::size_t scratch_mem_in_bytes;
82+
};
83+
84+
}}}// cv::dnn::cuda4dnn
85+
86+
#endif// OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP

‎modules/dnn/src/init.cpp‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ void initializeLayerFactory()
160160
CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer);
161161
CV_DNN_REGISTER_LAYER_CLASS(LayerNormalization, LayerNormLayer);
162162
CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);
163+
CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);
163164

164165
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
165166
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);

‎modules/dnn/src/layers/cpu_kernels/fast_norm.cpp‎

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,11 @@ void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output,
118118

119119
voidfastNormChannel(const Mat &input,const Mat &scale,const Mat &bias, Mat &output,float epsilon) {
120120
constauto input_shape =shape(input);
121+
size_t N = input_shape[0], C = input_shape[1];
121122
CV_CheckEQ(scale.total(), bias.total(),"fastNormChannel: scale and bias should have the same shape");
123+
CV_CheckEQ(scale.total(), C,"fastNormChannel: scale should be a 1d tensor and match the channel of input");
122124
CV_CheckGE(input.dims,3,"fastNormChannel: input dimension >= 3");
123125

124-
size_t N = input_shape[0], C = input_shape[1];
125126
size_t loops = N * C,
126127
norm_size =static_cast<size_t>(total(input_shape,2));
127128
float inv_norm_size =1.0 / norm_size;
@@ -147,9 +148,9 @@ void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &o
147148
float inv_stdev =1.f / mean_square;
148149

149150
size_t c = i % C;
150-
float s = scale_data[c], b = bias_data[c];
151+
float s = scale_data[c] * inv_stdev, b = bias_data[c];
151152
for (size_t j =0; j < norm_size; j++) {
152-
y[j] = s * (x[j] - mean)* inv_stdev+ b;
153+
y[j] = s * (x[j] - mean) + b;
153154
}
154155
}
155156
};

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp