Commite2d76a6

Alexander Lyulkov

committed

Added DNN_TARGET_CPU_FP16 to type inference(fixed3)

1 parenta555121 commite2d76a6Copy full SHA for e2d76a6

File tree

+24

-6

lines changed

+24

-6

lines changed

Lines changed: 7 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -887,12 +887,17 @@ Net build_net(`
`887`	`887`
`888`	`888`	`MatShape netInputShape =shape(input);`
`889`	`889`	`cv::dnn::MatType netInputType = input.depth();`
	`890`	`+`
	`891`	`+bool fp16 =false;`
`890`	`892`	`#ifdef HAVE_OPENCL`
`891`		`-bool fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`893`	`+ fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`894`	`+#endif`
	`895`	`+#if defined(__arm64__) && __arm64__`
	`896`	`+ fp16 =true;`
	`897`	`+#endif`
`892`	`898`	`if (netInputType == CV_32F && fp16`
`893`	`899`	`&& (targetId == DNN_TARGET_OPENCL_FP16 \|\| targetId == DNN_TARGET_CPU_FP16))`
`894`	`900`	`netInputType = CV_16F;`
`895`		`-#endif`
`896`	`901`	`size_t weightsMemory =0, blobsMemory =0;`
`897`	`902`	`net.getMemoryConsumption(netInputShape, netInputType, weightsMemory, blobsMemory);`
`898`	`903`	`int64 flops = net.getFLOPS(netInputShape, netInputType);`

Lines changed: 7 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -137,12 +137,17 @@ PERF_TEST_P_(Conv1D, conv1d)`
`137`	`137`
`138`	`138`	`MatShape netInputShape =shape(input);`
`139`	`139`	`cv::dnn::MatType netInputType = input.depth();`
	`140`	`+`
	`141`	`+bool fp16 =false;`
`140`	`142`	`#ifdef HAVE_OPENCL`
`141`		`-bool fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`143`	`+ fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`144`	`+#endif`
	`145`	`+#if defined(__arm64__) && __arm64__`
	`146`	`+ fp16 =true;`
	`147`	`+#endif`
`142`	`148`	`if (netInputType == CV_32F && fp16`
`143`	`149`	`&& (targetId == DNN_TARGET_OPENCL_FP16 \|\| targetId == DNN_TARGET_CPU_FP16))`
`144`	`150`	`netInputType = CV_16F;`
`145`		`-#endif`
`146`	`151`	`size_t weightsMemory =0, blobsMemory =0;`
`147`	`152`	`net.getMemoryConsumption(netInputShape, netInputType, weightsMemory, blobsMemory);`
`148`	`153`	`int64 flops = net.getFLOPS(netInputShape, netInputType);`

Lines changed: 7 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -156,12 +156,17 @@ PERF_TEST_P_(Conv3D, conv3d)`
`156`	`156`
`157`	`157`	`MatShape netInputShape =shape(input);`
`158`	`158`	`cv::dnn::MatType netInputType = input.depth();`
	`159`	`+`
	`160`	`+bool fp16 =false;`
`159`	`161`	`#ifdef HAVE_OPENCL`
`160`		`-bool fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`162`	`+ fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`163`	`+#endif`
	`164`	`+#if defined(__arm64__) && __arm64__`
	`165`	`+ fp16 =true;`
	`166`	`+#endif`
`161`	`167`	`if (netInputType == CV_32F && fp16`
`162`	`168`	`&& (targetId == DNN_TARGET_OPENCL_FP16 \|\| targetId == DNN_TARGET_CPU_FP16))`
`163`	`169`	`netInputType = CV_16F;`
`164`		`-#endif`
`165`	`170`	`size_t weightsMemory =0, blobsMemory =0;`
`166`	`171`	`net.getMemoryConsumption(netInputShape, netInputType, weightsMemory, blobsMemory);`
`167`	`172`	`int64 flops = net.getFLOPS(netInputShape, netInputType);`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,9 @@ class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<Backend, Target>`
`51`	`51`	`bool fp16 =false;`
`52`	`52`	`#ifdef HAVE_OPENCL`
`53`	`53`	`fp16 =ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");`
	`54`	`+#endif`
	`55`	`+#if defined(__arm64__) && __arm64__`
	`56`	`+ fp16 =true;`
`54`	`57`	`#endif`
`55`	`58`	`std::vector<cv::dnn::MatType> netMatTypes;`
`56`	`59`	`for (auto& inp : inputs) {`

Comments

(0)