Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Recent HAL changes ported to 4.9#26395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Closed
savuor wants to merge15 commits intoopencv:4.xfromsavuor:rv/fastcv_hal_4.9
Closed
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
15 commits
Select commitHold shift + click to select a range
67fe2ba
Merge pull request #25789 from asmorkalov:as/HAL_meanStdDev_tails
asmorkalovJun 27, 2024
e8cc13c
Merge pull request #25789 from asmorkalov:as/HAL_meanStdDev_tails
asmorkalovJun 27, 2024
b25db18
Relax equalizeHist test for some HAL implementations.
asmorkalovJun 27, 2024
e5f2be8
Use Carotene implementation of TEGRA_GaussianBlurBinomial 3x3 and 5x5…
asmorkalovJun 20, 2024
153a50a
Merge pull request #25792 from asmorkalov:as/HAL_fast_GaussianBlur
asmorkalovJul 12, 2024
e376ee7
Merge pull request #25936 from savuor:rv/hal_dot
savuorJul 23, 2024
ff8afe8
Report used HAL to test log and xml
asmorkalovJul 23, 2024
2f6c235
Merge pull request #25970 from savuor:rv/hal_pyrdown
savuorAug 6, 2024
a242431
Got rid of CAROTENE_NEON_ARCH and use standard __ARM_ARCH check.
asmorkalovAug 30, 2024
14e4be0
Merge pull request #26080 from asmorkalov:as/HAL_minMaxIdx_ND_offset
asmorkalovAug 30, 2024
a8f2b2b
Excluded nullptr leak to arithmetic HAL got from empty Mat.
asmorkalovSep 6, 2024
5a2e561
Merge pull request #26143 from asmorkalov:as/HAL_opticalFlowLK
asmorkalovSep 16, 2024
ba340ee
Merge pull request #26163 from asmorkalov:as/HAL_schaar_deriv
asmorkalovSep 23, 2024
5ee4f59
OpenCV Acceleration with FastCV HAL changes
sssanjee-quicOct 16, 2024
6f907fb
Fastcv HAL changes for Opencv Acceleration
sssanjee-quicOct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions3rdparty/carotene/CMakeLists.txt
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -52,5 +52,9 @@ if(WITH_NEON)
endif()
endif()

if(MINGW)
target_compile_definitions(carotene_objs PRIVATE "-D_USE_MATH_DEFINES=1")
endif()

# we add dummy file to fix XCode build
add_library(carotene STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} "$<TARGET_OBJECTS:carotene_objs>" "${CAROTENE_SOURCE_DIR}/dummy.cpp")
121 changes: 120 additions & 1 deletion3rdparty/carotene/hal/tegra_hal.hpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1286,7 +1286,6 @@ inline int TEGRA_SEPFILTERFREE(cvhalFilter2D *context)
#undef cv_hal_sepFilterFree
#define cv_hal_sepFilterFree TEGRA_SEPFILTERFREE


struct MorphCtx
{
int operation;
Expand DownExpand Up@@ -1857,6 +1856,126 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast<size_t>(range.
#define cv_hal_cvtTwoPlaneYUVtoBGREx TEGRA_CVT2PYUVTOBGR_EX
#endif

// The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8
#if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType)
{
switch(borderType)
{
case CV_HAL_BORDER_CONSTANT:
return CAROTENE_NS::BORDER_MODE_CONSTANT;
case CV_HAL_BORDER_REPLICATE:
return CAROTENE_NS::BORDER_MODE_REPLICATE;
case CV_HAL_BORDER_REFLECT:
return CAROTENE_NS::BORDER_MODE_REFLECT;
case CV_HAL_BORDER_WRAP:
return CAROTENE_NS::BORDER_MODE_WRAP;
case CV_HAL_BORDER_REFLECT_101:
return CAROTENE_NS::BORDER_MODE_REFLECT101;
}

return CAROTENE_NS::BORDER_MODE_UNDEFINED;
}

inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step,
int width, int height, int depth, int cn, size_t margin_left, size_t margin_top,
size_t margin_right, size_t margin_bottom, size_t ksize, int border_type)
{
CAROTENE_NS::Size2D sz(width, height);
CAROTENE_NS::BORDER_MODE border = borderCV2Carotene(border_type);
CAROTENE_NS::Margin mg(margin_left, margin_right, margin_top, margin_bottom);

if (ksize == 3)
{
if ((depth != CV_8U) || (cn != 1))
return CV_HAL_ERROR_NOT_IMPLEMENTED;

if (CAROTENE_NS::isGaussianBlur3x3MarginSupported(sz, border, mg))
{
CAROTENE_NS::gaussianBlur3x3Margin(sz, src_data, src_step, dst_data, dst_step,
border, 0, mg);
return CV_HAL_ERROR_OK;
}
}
else if (ksize == 5)
{
if (!CAROTENE_NS::isGaussianBlur5x5Supported(sz, cn, border))
return CV_HAL_ERROR_NOT_IMPLEMENTED;

if (depth == CV_8U)
{
CAROTENE_NS::gaussianBlur5x5(sz, cn, (uint8_t*)src_data, src_step,
(uint8_t*)dst_data, dst_step, border, 0, mg);
return CV_HAL_ERROR_OK;
}
else if (depth == CV_16U)
{
CAROTENE_NS::gaussianBlur5x5(sz, cn, (uint16_t*)src_data, src_step,
(uint16_t*)dst_data, dst_step, border, 0, mg);
return CV_HAL_ERROR_OK;
}
else if (depth == CV_16S)
{
CAROTENE_NS::gaussianBlur5x5(sz, cn, (int16_t*)src_data, src_step,
(int16_t*)dst_data, dst_step, border, 0, mg);
return CV_HAL_ERROR_OK;
}
}

return CV_HAL_ERROR_NOT_IMPLEMENTED;
}

#undef cv_hal_gaussianBlurBinomial
#define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial

#endif // __ARM_ARCH=7

#endif // OPENCV_IMGPROC_HAL_INTERFACE_H

// The optimized branch was developed for old armv7 processors
#if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
inline int TEGRA_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step,
const short* prev_deriv_data, size_t prev_deriv_step,
const uchar* next_data, size_t next_step,
int width, int height, int cn,
const float *prev_points, float *next_points, size_t point_count,
uchar *status, float *err,
const int win_width, const int win_height,
int termination_count, double termination_epsilon,
bool get_min_eigen_vals,
float min_eigen_vals_threshold)
{
if (!CAROTENE_NS::isSupportedConfiguration())
return CV_HAL_ERROR_NOT_IMPLEMENTED;

CAROTENE_NS::pyrLKOptFlowLevel(CAROTENE_NS::Size2D(width, height), cn,
prev_data, prev_data_step, prev_deriv_data, prev_deriv_step,
next_data, next_step,
point_count, prev_points, next_points,
status, err, CAROTENE_NS::Size2D(win_width, win_height),
termination_count, termination_epsilon,
get_min_eigen_vals, min_eigen_vals_threshold);
return CV_HAL_ERROR_OK;
}

#undef cv_hal_LKOpticalFlowLevel
#define cv_hal_LKOpticalFlowLevel TEGRA_LKOpticalFlowLevel
#endif // __ARM_ARCH=7

#if 0 // OpenCV provides fater parallel implementation
inline int TEGRA_ScharrDeriv(const uchar* src_data, size_t src_step,
short* dst_data, size_t dst_step,
int width, int height, int cn)
{
if (!CAROTENE_NS::isSupportedConfiguration())
return CV_HAL_ERROR_NOT_IMPLEMENTED;

CAROTENE_NS::ScharrDeriv(CAROTENE_NS::Size2D(width, height), cn, src_data, src_step, dst_data, dst_step);
return CV_HAL_ERROR_OK;
}

#undef cv_hal_ScharrDeriv
#define cv_hal_ScharrDeriv TEGRA_ScharrDeriv
#endif

#endif
2 changes: 1 addition & 1 deletion3rdparty/carotene/include/carotene/functions.hpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2485,7 +2485,7 @@ namespace CAROTENE_NS {
u8 *status, f32 *err,
const Size2D &winSize,
u32 terminationCount, f64 terminationEpsilon,
u32 level, u32 maxLevel, bool useInitialFlow,bool getMinEigenVals,
bool getMinEigenVals,
f32 minEigThreshold);
}

Expand Down
11 changes: 0 additions & 11 deletions3rdparty/carotene/src/common.hpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -58,17 +58,6 @@

namespace CAROTENE_NS { namespace internal {

#ifndef CAROTENE_NEON_ARCH
# if defined(__aarch64__) || defined(__aarch32__)
# define CAROTENE_NEON_ARCH 8
# else
# define CAROTENE_NEON_ARCH 7
# endif
#endif
#if ( !defined(__aarch64__) && !defined(__aarch32__) ) && (CAROTENE_NEON_ARCH == 8 )
# error("ARMv7 doen't support A32/A64 Neon instructions")
#endif

inline void prefetch(const void *ptr, size_t offset = 32*10)
{
#if defined __GNUC__
Expand Down
53 changes: 12 additions & 41 deletions3rdparty/carotene/src/opticalflow.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -58,7 +58,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
u8 *status, f32 *err,
const Size2D &winSize,
u32 terminationCount, f64 terminationEpsilon,
u32 level, u32 maxLevel, bool useInitialFlow,bool getMinEigenVals,
bool getMinEigenVals,
f32 minEigThreshold)
{
internal::assertSupportedConfiguration();
Expand All@@ -74,32 +74,11 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,

for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
{
f32 levscale = (1./(1 << level));
u32 ptref = ptidx << 1;
f32 prevPtX = prevPts[ptref+0]*levscale;
f32 prevPtY = prevPts[ptref+1]*levscale;
f32 nextPtX;
f32 nextPtY;
if( level == maxLevel )
{
if( useInitialFlow )
{
nextPtX = nextPts[ptref+0]*levscale;
nextPtY = nextPts[ptref+1]*levscale;
}
else
{
nextPtX = prevPtX;
nextPtY = prevPtY;
}
}
else
{
nextPtX = nextPts[ptref+0]*2.f;
nextPtY = nextPts[ptref+1]*2.f;
}
nextPts[ptref+0] = nextPtX;
nextPts[ptref+1] = nextPtY;
f32 prevPtX = prevPts[ptref+0];
f32 prevPtY = prevPts[ptref+1];
f32 nextPtX = nextPts[ptref+0];
f32 nextPtY = nextPts[ptref+1];

s32 iprevPtX, iprevPtY;
s32 inextPtX, inextPtY;
Expand All@@ -111,13 +90,10 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
{
if( level == 0 )
{
if( status )
status[ptidx] = false;
if( err )
err[ptidx] = 0;
}
if( status )
status[ptidx] = false;
if( err )
err[ptidx] = 0;
continue;
}

Expand DownExpand Up@@ -333,7 +309,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,

if( minEig < minEigThreshold || D < FLT_EPSILON )
{
if(level == 0 &&status )
if( status )
status[ptidx] = false;
continue;
}
Expand All@@ -353,7 +329,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
{
if(level == 0 &&status )
if( status )
status[ptidx] = false;
break;
}
Expand DownExpand Up@@ -469,8 +445,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
prevDeltaX = deltaX;
prevDeltaY = deltaY;
}

if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
if( status && status[ptidx] && err && !getMinEigenVals )
{
f32 nextPointX = nextPts[ptref+0] - halfWinX;
f32 nextPointY = nextPts[ptref+1] - halfWinY;
Expand DownExpand Up@@ -526,14 +501,10 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
(void)winSize;
(void)terminationCount;
(void)terminationEpsilon;
(void)level;
(void)maxLevel;
(void)useInitialFlow;
(void)getMinEigenVals;
(void)minEigThreshold;
(void)ptCount;
#endif
}

}//CAROTENE_NS

8 changes: 4 additions & 4 deletions3rdparty/carotene/src/vround_helper.hpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -57,7 +57,7 @@ namespace CAROTENE_NS { namespace internal {

inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
{
#ifCAROTENE_NEON_ARCH>= 8 /* get ready for ARMv9 */
#ifdefined(__ARM_ARCH) && (__ARM_ARCH>= 8)
return vcvtnq_u32_f32(val);
#else
const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
Expand All@@ -67,7 +67,7 @@ inline uint32x4_t vroundq_u32_f32(const float32x4_t val)

inline uint32x2_t vround_u32_f32(const float32x2_t val)
{
#ifCAROTENE_NEON_ARCH>= 8 /* get ready for ARMv9 */
#ifdefined(__ARM_ARCH) && (__ARM_ARCH>= 8)
return vcvtn_u32_f32(val);
#else
const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
Expand All@@ -77,7 +77,7 @@ inline uint32x2_t vround_u32_f32(const float32x2_t val)

inline int32x4_t vroundq_s32_f32(const float32x4_t val)
{
#ifCAROTENE_NEON_ARCH>= 8 /* get ready for ARMv9 */
#ifdefined(__ARM_ARCH) && (__ARM_ARCH>= 8)
return vcvtnq_s32_f32(val);
#else
const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
Expand All@@ -87,7 +87,7 @@ inline int32x4_t vroundq_s32_f32(const float32x4_t val)

inline int32x2_t vround_s32_f32(const float32x2_t val)
{
#ifCAROTENE_NEON_ARCH>= 8 /* get ready for ARMv9 */
#ifdefined(__ARM_ARCH) && (__ARM_ARCH>= 8)
return vcvtn_s32_f32(val);
#else
const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
Expand Down
33 changes: 33 additions & 0 deletions3rdparty/fastcv/CMakeLists.txt
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
if(FCV_ENABLE)
set(OPENCV_3P_FASTCV_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(FASTCV_LIB
"${FCV_LIB_DIR}/libfastcvopt.so")

set(FASTCV_HAL_VERSION 0.0.1 CACHE INTERNAL "")
set(FASTCV_HAL_LIBRARIES "fastcv_hal" CACHE INTERNAL "")
set(FASTCV_HAL_INCLUDE_DIRS
"${OPENCV_3P_FASTCV_DIR}/include"
${FCV_HEADER_DIR}
CACHE INTERNAL "")

file(GLOB FASTCV_HAL_HEADERS "${OPENCV_3P_FASTCV_DIR}/include/*.hpp")
file(GLOB FASTCV_HAL_FILES "${OPENCV_3P_FASTCV_DIR}/src/*.cpp")
add_library(fastcv_hal STATIC "${FASTCV_HAL_FILES}")

target_include_directories(fastcv_hal PRIVATE
${CMAKE_SOURCE_DIR}/modules/core/include
${CMAKE_SOURCE_DIR}/modules/imgproc/include
${FASTCV_HAL_INCLUDE_DIRS})

target_link_libraries(fastcv_hal
PUBLIC "${FCV_LIB_DIR}/libfastcvopt.so")

set_target_properties(fastcv_hal PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH})

if(NOT BUILD_SHARED_LIBS)
ocv_install_target(fastcv_hal EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
endif()

else()
message(STATUS "FastCV is not available, disabling related HAL and stuff")
endif()
35 changes: 35 additions & 0 deletions3rdparty/fastcv/fastcv.cmake
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
function(download_fastcv root_dir)

# Commit SHA in the opencv_3rdparty repo
set(FASTCV_COMMIT "65f40fc8f7a6aac44936ae9538e69edede6c4b15")

# Define actual FCV versions
if(ANDROID)
if(AARCH64)
set(FCV_PACKAGE_NAME "fastcv_android_aarch64_2024_10_24.tgz")
set(FCV_PACKAGE_HASH "8a259eea80064643bad20f72ba0b6066")
else()
set(FCV_PACKAGE_NAME "fastcv_android_arm32_2024_10_24.tgz")
set(FCV_PACKAGE_HASH "04d89219c44d54166b2b7f8c0ed5143b")
endif()
elseif(UNIX AND NOT APPLE AND NOT IOS AND NOT XROS)
message("FastCV: fastcv lib for Linux is not supported for now!")
endif(ANDROID)

# Download Package
set(OPENCV_FASTCV_URL "https://raw.githubusercontent.com/opencv/opencv_3rdparty/${FASTCV_COMMIT}/fastcv/")

ocv_download( FILENAME ${FCV_PACKAGE_NAME}
HASH ${FCV_PACKAGE_HASH}
URL ${OPENCV_FASTCV_URL}
DESTINATION_DIR ${root_dir}
ID FASTCV
STATUS res
UNPACK
RELATIVE_URL)

if(NOT res)
message("FastCV: package download failed! Please download FastCV manually and put it at ${root_dir}.")
endif()

endfunction()
Loading

[8]ページ先頭

©2009-2025 Movatter.jp