Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8ecfbdb

Browse files
Merge pull request#19883 from jondea:arm-neon-optimised-color-lab-3.4
* Add Neon optimised RGB2Lab conversion* Fix compile errors, change lambda to macro* Change NEON optimised RGB2Lab to just use HAL* Change [] to v_extract_n in RGB2Lab* RGB2LAB Code quality, change to nlane agnostic* Change RGB2Lab to use function rather than macro* Remove whitespaceCo-authored-by: Francesco Petrogalli <25690309+fpetrogalli@users.noreply.github.com>
1 parent63256a0 commit8ecfbdb

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

‎modules/imgproc/src/color_lab.cpp‎

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,8 @@ static inline void trilinearPackedInterpolate(const v_uint16& inX, const v_uint1
15361536
#endif// CV_SIMD
15371537

15381538

1539+
1540+
15391541
structRGB2Lab_b
15401542
{
15411543
typedef uchar channel_type;
@@ -1571,6 +1573,69 @@ struct RGB2Lab_b
15711573
}
15721574
}
15731575

1576+
#if CV_NEON
1577+
template<int n>
1578+
inlinevoidrgb2lab_batch(const ushort* tab,
1579+
const v_uint8 vRi,const v_uint8 vGi,const v_uint8 vBi,
1580+
v_int32& vL, v_int32& va, v_int32& vb)const
1581+
{
1582+
// Define some scalar constants which we will make use of later
1583+
constint Lscale = (116*255+50)/100;
1584+
constint Lshift = -((16*255*(1 << lab_shift2) +50)/100);
1585+
constint xyzDescaleShift = (1 << (lab_shift -1));
1586+
constint labDescaleShift = (1 << (lab_shift2 -1));
1587+
constint abShift =128*(1 << lab_shift2);
1588+
1589+
constint C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
1590+
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
1591+
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
1592+
1593+
// int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
1594+
v_int32vR(tab[v_extract_n<4*n+0>(vRi)], tab[v_extract_n<4*n+1>(vRi)],
1595+
tab[v_extract_n<4*n+2>(vRi)], tab[v_extract_n<4*n+3>(vRi)]);
1596+
v_int32vG(tab[v_extract_n<4*n+0>(vGi)], tab[v_extract_n<4*n+1>(vGi)],
1597+
tab[v_extract_n<4*n+2>(vGi)], tab[v_extract_n<4*n+3>(vGi)]);
1598+
v_int32vB(tab[v_extract_n<4*n+0>(vBi)], tab[v_extract_n<4*n+1>(vBi)],
1599+
tab[v_extract_n<4*n+2>(vBi)], tab[v_extract_n<4*n+3>(vBi)]);
1600+
1601+
/* int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];*/
1602+
v_int32 vfX =v_fma(vR,v_setall_s32(C0),v_setall_s32(xyzDescaleShift));
1603+
vfX =v_fma(vG,v_setall_s32(C1), vfX);
1604+
vfX =v_fma(vB,v_setall_s32(C2), vfX);
1605+
vfX = v_shr<lab_shift>(vfX);
1606+
vfX =v_int32(LabCbrtTab_b[v_extract_n<0>(vfX)], LabCbrtTab_b[v_extract_n<1>(vfX)],
1607+
LabCbrtTab_b[v_extract_n<2>(vfX)], LabCbrtTab_b[v_extract_n<3>(vfX)]);
1608+
1609+
/* int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)];*/
1610+
v_int32 vfY =v_fma(vR,v_setall_s32(C3),v_setall_s32(xyzDescaleShift));
1611+
vfY =v_fma(vG,v_setall_s32(C4), vfY);
1612+
vfY =v_fma(vB,v_setall_s32(C5), vfY);
1613+
vfY = v_shr<lab_shift>(vfY);
1614+
vfY =v_int32(LabCbrtTab_b[v_extract_n<0>(vfY)], LabCbrtTab_b[v_extract_n<1>(vfY)],
1615+
LabCbrtTab_b[v_extract_n<2>(vfY)], LabCbrtTab_b[v_extract_n<3>(vfY)]);
1616+
1617+
/* int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];*/
1618+
v_int32 vfZ =v_fma(vR,v_setall_s32(C6),v_setall_s32(xyzDescaleShift));
1619+
vfZ =v_fma(vG,v_setall_s32(C7), vfZ);
1620+
vfZ =v_fma(vB,v_setall_s32(C8), vfZ);
1621+
vfZ = v_shr<lab_shift>(vfZ);
1622+
vfZ =v_int32(LabCbrtTab_b[v_extract_n<0>(vfZ)], LabCbrtTab_b[v_extract_n<1>(vfZ)],
1623+
LabCbrtTab_b[v_extract_n<2>(vfZ)], LabCbrtTab_b[v_extract_n<3>(vfZ)]);
1624+
1625+
/* int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );*/
1626+
vL =v_fma(vfY,v_setall_s32(Lscale),v_setall_s32(Lshift+labDescaleShift));
1627+
vL = v_shr<lab_shift2>(vL);
1628+
1629+
/* int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );*/
1630+
va =v_fma(vfX - vfY,v_setall_s32(500),v_setall_s32(abShift+labDescaleShift));
1631+
va = v_shr<lab_shift2>(va);
1632+
1633+
/* int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );*/
1634+
vb =v_fma(vfY - vfZ,v_setall_s32(200),v_setall_s32(abShift+labDescaleShift));
1635+
vb = v_shr<lab_shift2>(vb);
1636+
}
1637+
#endif// CV_NEON
1638+
15741639
voidoperator()(const uchar* src, uchar* dst,int n)const
15751640
{
15761641
CV_INSTRUMENT_REGION();
@@ -1585,6 +1650,45 @@ struct RGB2Lab_b
15851650

15861651
i =0;
15871652

1653+
#if CV_NEON
1654+
// On each loop, we load nlanes of RGB/A v_uint8s and store nlanes of
1655+
// Lab v_uint8s
1656+
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes,
1657+
src += scn*v_uint8::nlanes, dst +=3*v_uint8::nlanes )
1658+
{
1659+
// Load 4 batches of 4 src
1660+
v_uint8 vRi, vGi, vBi;
1661+
if(scn ==4)
1662+
{
1663+
v_uint8 vAi;
1664+
v_load_deinterleave(src, vRi, vGi, vBi, vAi);
1665+
}
1666+
else// scn == 3
1667+
{
1668+
v_load_deinterleave(src, vRi, vGi, vBi);
1669+
}
1670+
1671+
// Do 4 batches of 4 RGB2Labs
1672+
v_int32 vL0, va0, vb0;
1673+
rgb2lab_batch<0>(tab, vRi, vGi, vBi, vL0, va0, vb0);
1674+
v_int32 vL1, va1, vb1;
1675+
rgb2lab_batch<1>(tab, vRi, vGi, vBi, vL1, va1, vb1);
1676+
v_int32 vL2, va2, vb2;
1677+
rgb2lab_batch<2>(tab, vRi, vGi, vBi, vL2, va2, vb2);
1678+
v_int32 vL3, va3, vb3;
1679+
rgb2lab_batch<3>(tab, vRi, vGi, vBi, vL3, va3, vb3);
1680+
1681+
// Saturate, combine and store all batches
1682+
// dst[0] = saturate_cast<uchar>(L);
1683+
// dst[1] = saturate_cast<uchar>(a);
1684+
// dst[2] = saturate_cast<uchar>(b);
1685+
v_store_interleave(dst,
1686+
v_pack(v_pack_u(vL0, vL1),v_pack_u(vL2, vL3)),
1687+
v_pack(v_pack_u(va0, va1),v_pack_u(va2, va3)),
1688+
v_pack(v_pack_u(vb0, vb1),v_pack_u(vb2, vb3)));
1689+
}
1690+
#endif// CV_NEON
1691+
15881692
#if CV_SIMD
15891693
constint vsize = v_uint8::nlanes;
15901694
constint xyzDescaleShift =1 << (lab_shift -1);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp