NotificationsYou must be signed in to change notification settings
Fork56.4k
Star85.4k

Commitee0822d

authored

Merge pull request#24378 from fengyuentau:instance_norm

dnn onnx: add instance norm layer#24378Resolves#24377Relates#24092 (comment)| Perf | multi-thread | single-thread || - | - | - || x: [2, 64, 180, 240] | 3.95ms | 11.12ms |Todo:- [x] speed up by multi-threading- [x] add perf- [x] add backend: OpenVINO- [x] add backend: CUDA- [x] add backend: OpenCL (no fp16)- [ ] add backend: CANN (will be done via#24462)### Pull Request Readiness ChecklistSee details athttps://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request- [x] I agree to contribute to the project under Apache 2 License.- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV- [x] The PR is proposed to the proper branch- [x] There is a reference to the original bug report and related work- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name.- [x] The feature is well documented and sample code can be built with the project CMake```force_builders=Linux OpenCL,Win64 OpenCL,Custombuildworker:Custom=linux-4build_image:Custom=ubuntu:18.04modules_filter:Custom=nonedisable_ipp:Custom=ON```

1 parent832f738 commitee0822dCopy full SHA for ee0822d

File tree

10 files changed

+454

-43

lines changed

modules/dnn
- include/opencv2/dnn
  - all_layers.hpp
- perf
  - perf_layer.cpp
- src
  - cuda4dnn
    - kernels
      - mvn.hpp
    - primitives
      - instance_norm.hpp
  - cuda
    - mvn.cu
  - init.cpp
  - layers
    - cpu_kernels
      - fast_norm.cpp
    - instance_norm_layer.cpp
  - onnx
    - onnx_importer.cpp
- test
  - test_onnx_conformance_layer_parser_denylist.inl.hpp

10 files changed

+454

-43

lines changed

`‎modules/dnn/include/opencv2/dnn/all_layers.hpp‎`

Lines changed: 7 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1166,6 +1166,13 @@ CV__DNN_INLINE_NS_BEGIN`
`1166`	`1166`	`static Ptr<ExpandLayer>create(const LayerParams &params);`
`1167`	`1167`	`};`
`1168`	`1168`
	`1169`	`+classCV_EXPORTS InstanceNormLayer : public Layer {`
	`1170`	`+public:`
	`1171`	`+float epsilon;`
	`1172`	`+`
	`1173`	`+static Ptr<InstanceNormLayer>create(const LayerParams &params);`
	`1174`	`+ };`
	`1175`	`+`
`1169`	`1176`	`//! @}`
`1170`	`1177`	`//! @}`
`1171`	`1178`	`CV__DNN_INLINE_NS_END`

`‎modules/dnn/perf/perf_layer.cpp‎`

Lines changed: 57 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -683,6 +683,62 @@ PERF_TEST_P_(Layer_GatherElements, GatherElements)`
`683`	`683`	`test_layer({2700,1,2914}, {2700,1,81},2);`
`684`	`684`	`}`
`685`	`685`
	`686`	`+structLayer_InstanceNorm :publicTestBaseWithParam<tuple<Backend, Target> >`
	`687`	`+{`
	`688`	`+voidtest_layer(const std::vector<int>& x_shape)`
	`689`	`+ {`
	`690`	`+int backendId = get<0>(GetParam());`
	`691`	`+int targetId = get<1>(GetParam());`
	`692`	`+`
	`693`	`+ Matx(x_shape, CV_32FC1);`
	`694`	`+ Matscale(x_shape[1],1, CV_32FC1);`
	`695`	`+ Matb(x_shape[1],1, CV_32FC1);`
	`696`	`+`
	`697`	`+randu(x,0.f,1.f);`
	`698`	`+randu(scale,0.f,1.f);`
	`699`	`+randu(b,0.f,1.f);`
	`700`	`+`
	`701`	`+ Net net;`
	`702`	`+ LayerParams lp;`
	`703`	`+ lp.type ="InstanceNormalization";`
	`704`	`+ lp.name ="testLayer";`
	`705`	`+int id = net.addLayerToPrev(lp.name, lp.type, lp);`
	`706`	`+ net.connect(0,0, id,0);`
	`707`	`+ net.connect(0,1, id,1);`
	`708`	`+ net.connect(0,2, id,2);`
	`709`	`+`
	`710`	`+// warmup`
	`711`	`+ {`
	`712`	`+ std::vector<String> inpNames{"x","scale","b"};`
	`713`	`+ net.setInputsNames(inpNames);`
	`714`	`+ net.setInput(x, inpNames[0]);`
	`715`	`+ net.setInput(scale, inpNames[1]);`
	`716`	`+ net.setInput(b, inpNames[2]);`
	`717`	`+`
	`718`	`+ net.setPreferableBackend(backendId);`
	`719`	`+ net.setPreferableTarget(targetId);`
	`720`	`+ Mat out = net.forward();`
	`721`	`+ }`
	`722`	`+`
	`723`	`+TEST_CYCLE()`
	`724`	`+ {`
	`725`	`+ Mat res = net.forward();`
	`726`	`+ }`
	`727`	`+`
	`728`	`+SANITY_CHECK_NOTHING();`
	`729`	`+ }`
	`730`	`+`
	`731`	`+int N =2;`
	`732`	`+int C =64;`
	`733`	`+int H =180;`
	`734`	`+int W =240;`
	`735`	`+};`
	`736`	`+`
	`737`	`+PERF_TEST_P_(Layer_InstanceNorm, InstanceNorm)`
	`738`	`+{`
	`739`	`+test_layer({N, C, H, W});`
	`740`	`+}`
	`741`	`+`
`686`	`742`	`INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false,false));`
`687`	`743`	`INSTANTIATE_TEST_CASE_P(/**/, Layer_NaryEltwise, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));`
`688`	`744`	`#ifdef HAVE_CUDA`
`@@ -693,6 +749,7 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D`
`693`	`749`	`INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));`
`694`	`750`	`INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));`
`695`	`751`	`INSTANTIATE_TEST_CASE_P(/**/, Layer_GatherElements, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));`
	`752`	`+INSTANTIATE_TEST_CASE_P(/**/, Layer_InstanceNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));`
`696`	`753`
`697`	`754`
`698`	`755`	`typedef TestBaseWithParam<tuple<Vec4i,int,bool, tuple<Backend, Target> > > Layer_FullyConnected;`

`‎modules/dnn/src/cuda/mvn.cu‎`

Lines changed: 28 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -66,6 +66,17 @@ namespace raw {`
`66`	`66`	`output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * scale[outer_idx];`
`67`	`67`	`}`
`68`	`68`	`}`
	`69`	`+`
	`70`	`+template<classT>`
	`71`	`+__global__voidnormalize_mean_variance_channelwise(Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> stdev, size_type inner_size, size_type C) {`
	`72`	`+for (auto idx :grid_stride_range(output.size())) {`
	`73`	`+const index_type outer_idx = idx / inner_size;`
	`74`	`+const index_type c = outer_idx % C;`
	`75`	`+auto s =static_cast<float>(scale[c]) * stdev[outer_idx];`
	`76`	`+auto b =static_cast<float>(bias[c]);`
	`77`	`+ output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * s + b;`
	`78`	`+ }`
	`79`	`+ }`
`69`	`80`	`}`
`70`	`81`
`71`	`82`	`template<classT>`
`@@ -142,4 +153,21 @@ template void normalize_mean_variance(const Stream&, Span<__half>, View<__half>,`
`142`	`153`	`#endif`
`143`	`154`	`templatevoidnormalize_mean_variance(const Stream&, Span<float>, View<float>, View<float>, View<float>, std::size_t);`
`144`	`155`
	`156`	`+template<classT>`
	`157`	`+voidnormalize_mean_variance_channelwise(const Stream& stream, Span<T> output, View<T> input, View<T> scale, View<T> bias, View<float> means, View<float> stdev, std::size_t inner_size, std::size_t C)`
	`158`	`+{`
	`159`	`+CV_Assert(input.size() == output.size());`
	`160`	`+CV_Assert(input.size() / inner_size == means.size());`
	`161`	`+CV_Assert(means.size() == stdev.size());`
	`162`	`+`
	`163`	`+auto kernel = raw::normalize_mean_variance_channelwise<T>;`
	`164`	`+auto policy =make_policy(kernel, output.size(),0, stream);`
	`165`	`+launch_kernel(kernel, policy, output, input, scale, bias, means, stdev, inner_size, C);`
	`166`	`+}`
	`167`	`+`
	`168`	`+#if !defined(__CUDA_ARCH__) \|\| (__CUDA_ARCH__ >= 530)`
	`169`	`+templatevoidnormalize_mean_variance_channelwise(const Stream&, Span<__half>/output/, View<__half>/input/, View<__half>/scale/, View<__half>/bias/, View<float>/means/, View<float>/stdev/, std::size_t, std::size_t);`
	`170`	`+#endif`
	`171`	`+templatevoidnormalize_mean_variance_channelwise(const Stream&, Span<float>/output/, View<float>/input/, View<float>/scale/, View<float>/bias/, View<float>/means/, View<float>/stdev/, std::size_t, std::size_t);`
	`172`	`+`
`145`	`173`	`}}}}/* namespace cv::dnn::cuda4dnn::kernels*/`

`‎modules/dnn/src/cuda4dnn/kernels/mvn.hpp‎`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,9 @@ void normalize_mean(const csl::Stream& stream, csl::Span<T> output, csl::View<T>`
`26`	`26`	`template<classT>`
`27`	`27`	`voidnormalize_mean_variance(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, csl::View<float> means, csl::View<float> scale, std::size_t inner_size);`
`28`	`28`
	`29`	`+template<classT>`
	`30`	`+voidnormalize_mean_variance_channelwise(const csl::Stream &stream, csl::Span<T> output, csl::View<T> input, csl::View<T> scale, csl::View<T> bias, csl::View<float> means, csl::View<float> stdev, std::size_t inner_size, std::size_t C);`
	`31`	`+`
`29`	`32`	`}}}}/* namespace cv::dnn::cuda4dnn::kernels*/`
`30`	`33`
`31`	`34`	`#endif/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_MVN_HPP*/`

`‎modules/dnn/src/cuda4dnn/primitives/instance_norm.hpp‎`

Lines changed: 86 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,86 @@`
	`1`	`+// This file is part of OpenCV project.`
	`2`	`+// It is subject to the license terms in the LICENSE file found in the top-level directory`
	`3`	`+// of this distribution and at http://opencv.org/license.html.`
	`4`	`+`
	`5`	`+#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP`
	`6`	`+#defineOPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP`
	`7`	`+`
	`8`	`+#include"../../op_cuda.hpp"`
	`9`	`+`
	`10`	`+#include"../csl/stream.hpp"`
	`11`	`+#include"../csl/span.hpp"`
	`12`	`+#include"../csl/tensor.hpp"`
	`13`	`+#include"../csl/workspace.hpp"`
	`14`	`+`
	`15`	`+#include"../kernels/fill_copy.hpp"`
	`16`	`+#include"../kernels/mvn.hpp"`
	`17`	`+`
	`18`	`+#include<opencv2/core.hpp>`
	`19`	`+`
	`20`	`+#include<cstddef>`
	`21`	`+#include<vector>`
	`22`	`+#include<utility>`
	`23`	`+`
	`24`	`+namespacecv {namespacednn {namespacecuda4dnn {`
	`25`	`+`
	`26`	`+template<classT>`
	`27`	`+classInstanceNormOpfinal : public CUDABackendNode {`
	`28`	`+public:`
	`29`	`+using wrapper_type = GetCUDABackendWrapperType<T>;`
	`30`	`+`
	`31`	`+InstanceNormOp(csl::Stream stream_,float epsilon_,size_t loops)`
	`32`	`+ : stream(std::move(stream_)), epsilon(epsilon_) {`
	`33`	`+ csl::WorkspaceBuilder builder;`
	`34`	`+ builder.require<float>(loops);`
	`35`	`+ builder.require<float>(loops);`
	`36`	`+ scratch_mem_in_bytes = builder.required_workspace_size();`
	`37`	`+ }`
	`38`	`+`
	`39`	`+voidforward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,`
	`40`	`+const std::vector<cv::Ptr<BackendWrapper>>& outputs,`
	`41`	`+ csl::Workspace& workspace)override {`
	`42`	`+auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();`
	`43`	`+auto scale_wrapper = inputs[1].dynamicCast<wrapper_type>();`
	`44`	`+auto bias_wrapper = inputs[2].dynamicCast<wrapper_type>();`
	`45`	`+`
	`46`	`+auto input = input_wrapper->getView();`
	`47`	`+auto scale = scale_wrapper->getView();`
	`48`	`+auto bias = bias_wrapper->getView();`
	`49`	`+`
	`50`	`+auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();`
	`51`	`+auto output = output_wrapper->getSpan();`
	`52`	`+`
	`53`	`+auto C = input.get_axis_size(1);`
	`54`	`+auto loops = input.size_range(0,2);`
	`55`	`+auto norm_size = input.size_range(2, input.rank());`
	`56`	`+if (norm_size ==1) {`
	`57`	`+ kernels::fill<T>(stream, output,0.f);`
	`58`	`+return;`
	`59`	`+ }else {`
	`60`	`+auto ws_allocator =csl::WorkspaceAllocator(workspace);`
	`61`	`+`
	`62`	`+auto mean = ws_allocator.get_span<float>(loops);`
	`63`	`+ kernels::fill<float>(stream, mean,0.f);`
	`64`	`+`
	`65`	`+auto stdev = ws_allocator.get_span<float>(loops);`
	`66`	`+ kernels::fill<float>(stream, stdev,0.f);`
	`67`	`+`
	`68`	`+ kernels::reduce_mean_sqr_sum<T>(stream, mean, stdev, input, norm_size);`
	`69`	`+kernels::compute_normalization_scale(stream, stdev, mean, stdev, norm_size, epsilon);`
	`70`	`+ kernels::normalize_mean_variance_channelwise<T>(stream, output, input, scale, bias, mean, stdev, norm_size, C);`
	`71`	`+ }`
	`72`	`+ }`
	`73`	`+`
	`74`	`+ std::size_tget_workspace_memory_in_bytes()constnoexceptoverride {return scratch_mem_in_bytes; }`
	`75`	`+`
	`76`	`+private:`
	`77`	`+ csl::Stream stream;`
	`78`	`+`
	`79`	`+float epsilon;`
	`80`	`+`
	`81`	`+ std::size_t scratch_mem_in_bytes;`
	`82`	`+ };`
	`83`	`+`
	`84`	`+}}}// cv::dnn::cuda4dnn`
	`85`	`+`
	`86`	`+#endif// OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_INSTANCE_NORM_HPP`

`‎modules/dnn/src/init.cpp‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -160,6 +160,7 @@ void initializeLayerFactory()`
`160`	`160`	`CV_DNN_REGISTER_LAYER_CLASS(GatherElements, GatherElementsLayer);`
`161`	`161`	`CV_DNN_REGISTER_LAYER_CLASS(LayerNormalization, LayerNormLayer);`
`162`	`162`	`CV_DNN_REGISTER_LAYER_CLASS(Expand, ExpandLayer);`
	`163`	`+CV_DNN_REGISTER_LAYER_CLASS(InstanceNormalization, InstanceNormLayer);`
`163`	`164`
`164`	`165`	`CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);`
`165`	`166`	`CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);`

`‎modules/dnn/src/layers/cpu_kernels/fast_norm.cpp‎`

Lines changed: 4 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -118,10 +118,11 @@ void fastNorm(const Mat &input, const Mat &scale, const Mat &bias, Mat &output,`
`118`	`118`
`119`	`119`	`voidfastNormChannel(const Mat &input,const Mat &scale,const Mat &bias, Mat &output,float epsilon) {`
`120`	`120`	`constauto input_shape =shape(input);`
	`121`	`+size_t N = input_shape[0], C = input_shape[1];`
`121`	`122`	`CV_CheckEQ(scale.total(), bias.total(),"fastNormChannel: scale and bias should have the same shape");`
	`123`	`+CV_CheckEQ(scale.total(), C,"fastNormChannel: scale should be a 1d tensor and match the channel of input");`
`122`	`124`	`CV_CheckGE(input.dims,3,"fastNormChannel: input dimension >= 3");`
`123`	`125`
`124`		`-size_t N = input_shape[0], C = input_shape[1];`
`125`	`126`	`size_t loops = N * C,`
`126`	`127`	`norm_size =static_cast<size_t>(total(input_shape,2));`
`127`	`128`	`float inv_norm_size =1.0 / norm_size;`
`@@ -147,9 +148,9 @@ void fastNormChannel(const Mat &input, const Mat &scale, const Mat &bias, Mat &o`
`147`	`148`	`float inv_stdev =1.f / mean_square;`
`148`	`149`
`149`	`150`	`size_t c = i % C;`
`150`		`-float s = scale_data[c], b = bias_data[c];`
	`151`	`+float s = scale_data[c] * inv_stdev, b = bias_data[c];`
`151`	`152`	`for (size_t j =0; j < norm_size; j++) {`
`152`		`- y[j] = s * (x[j] - mean)* inv_stdev+ b;`
	`153`	`+ y[j] = s * (x[j] - mean) + b;`
`153`	`154`	`}`
`154`	`155`	`}`
`155`	`156`	`};`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commitee0822d

File tree

10 files changed

10 files changed

`‎modules/dnn/include/opencv2/dnn/all_layers.hpp‎`

`‎modules/dnn/perf/perf_layer.cpp‎`

`‎modules/dnn/src/cuda/mvn.cu‎`

`‎modules/dnn/src/cuda4dnn/kernels/mvn.hpp‎`

`‎modules/dnn/src/cuda4dnn/primitives/instance_norm.hpp‎`

`‎modules/dnn/src/init.cpp‎`

`‎modules/dnn/src/layers/cpu_kernels/fast_norm.cpp‎`

0 commit comments